This chunk took minutes

0.1 Load packages

library(phyloseq)
library(ggord)
library(ggplot2)
library(viridis)
library(mgcViz)
library(dichromat)
library(ggpattern)
library(expss)
library(dplyr)
library(tidyverse)
library(GGally)
library(vegan)
library(ggpubr)
library(pheatmap)
library(lubridate)
library(ggsci)
library(RColorBrewer)
library(microbiome)
library(vegan)
library(lme4)
library(performance)
library(ggrepel)

library(glmmTMB)
library(mgcv)
library(gratia)
library(speedyseq)
library(ggeffects)
library(MuMIn)
library(effects)
library(broom)
library(cowplot)

library("coxme")
library(survival)

library(SpiecEasi)
library(NetCoMi)

memory.limit(1000000)
## [1] 1e+06

This chunk took 0.32 minutes

1 Data import and normalising

meerkat_final<-readRDS("C:\\Users\\risel\\Dropbox\\Sommer postdoc\\Meerkat project\\PROJECTS\\MeerkatSpikeInData\\Analysis updated\\6. TB AND MB POPULATION LEVEL\\Published data and code\\processed_data.RDS")

names(sample_data(meerkat_final))
##  [1] "feature.id"          "IndividID"           "SampleDate"         
##  [4] "SampleTime"          "Storage"             "Seq_run"            
##  [7] "Imtechella"          "Allobacillus"        "Seq_depth"          
## [10] "scaling_factor"      "Seq_depth_nospikein" "BirthDate"          
## [13] "DeathDate"           "AgeAtSampling"       "GroupAtSampling"    
## [16] "Condition_weight"    "SocialStatus"        "Temp_max"           
## [19] "sum_rainfall_month"  "HoursAfterSunrise"   "TimeCat"            
## [22] "Year"                "month"               "Season"             
## [25] "TB_status"           "TB_exposure"         "TB_resistance"      
## [28] "TB_survival"         "TB_symptom_date"     "TB_exposure_date"   
## [31] "TB_stage"
asv_table<-data.frame(meerkat_final@otu_table@.Data)
asv_table[1:5,1:5]
names(asv_table)<-sample_data(meerkat_final)$feature.id
meerkat_normalized<-sweep(asv_table, MARGIN=2, sample_data(meerkat_final)$scaling_factor, `*`)
meerkat_normalized<-round(meerkat_normalized, digits = 0)
meerkat_normalized<-otu_table(meerkat_normalized, taxa_are_rows = TRUE)
meerkat_scaled<-meerkat_final
otu_table(meerkat_scaled)<-meerkat_normalized


### add total abundance of reads to dataframe
sample_data(meerkat_scaled)$TotalAbundance<-sample_sums(meerkat_scaled)

meerkat_scaled
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 26122 taxa and 1142 samples ]:
## sample_data() Sample Data:        [ 1142 samples by 32 sample variables ]:
## tax_table()   Taxonomy Table:     [ 26122 taxa by 7 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 26122 tips and 26121 internal nodes ]:
## taxa are rows
#Filter samples

meerkat_filtered<-subset_samples(meerkat_scaled, Seq_depth_nospikein >3000)
meerkat_filtered<-subset_samples(meerkat_filtered, sample_sums(meerkat_filtered)>1000)

meerkat_filtered
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 26122 taxa and 1141 samples ]:
## sample_data() Sample Data:        [ 1141 samples by 32 sample variables ]:
## tax_table()   Taxonomy Table:     [ 26122 taxa by 7 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 26122 tips and 26121 internal nodes ]:
## taxa are rows

This chunk took 0.35 minutes

2 Fig. S1: Sampling design

ggplot(data=sample_data(meerkat_filtered),aes(x=SampleDate,y=reorder(IndividID, SampleDate),group=IndividID))+
  geom_line(size=0.1,alpha=0.5, col = "darkgrey")+
  geom_point(size=2, aes(fill = TB_stage), pch = 21)+
   geom_point(data = subset(sample_data(meerkat_filtered), TB_stage == "Diseased"),size=2,  fill = "brown2", pch = 21)+
  scale_fill_manual(values = c("forestgreen", "skyblue", "brown2"))+
  theme_bw(base_size = 14)+
  theme(strip.background = element_blank(), strip.text = element_blank())+
  theme(panel.grid.minor=element_blank(),panel.grid.major=element_blank(),
        panel.background=element_blank())+
  theme(axis.text=element_blank(),axis.title.x = element_blank(),
        axis.ticks.y=element_blank())+
  scale_x_date(date_breaks = "5 year")+
  ylab("Meerkat ID")+
   geom_vline(xintercept = as.Date("2000-01-01"), linetype = "dashed", col = "grey")+
  geom_vline(xintercept = as.Date("2005-01-01"), linetype = "dashed", col = "grey")+
  geom_vline(xintercept = as.Date("2010-01-01"), linetype = "dashed", col = "grey")+
  geom_vline(xintercept = as.Date("2015-01-01"), linetype = "dashed", col = "grey")+
  geom_vline(xintercept = as.Date("2020-01-01"), linetype = "dashed", col = "grey")+
  coord_cartesian(xlim = c(as.Date("1997-01-01"), as.Date("2020-01-01")))+
  theme(plot.margin=unit(c(0.2,0.3,0,0.2),"cm"))+
   guides(fill = guide_legend(override.aes = list(size = 3)))

This chunk took 0.04 minutes

4 Fig. 2e-i: Gut microbiome and metadata summary

meerkat_class<-tax_glom(meerkat_filtered, taxrank = "Class")

sample_data(meerkat_class)<-sample_data(meerkat_class)[,c("feature.id")]
meerkat_class<-microbiome::transform(meerkat_class, "compositional") 
class_per_sample<-psmelt(meerkat_class)

top_class<-c("Bacteroidia","Fusobacteriia" ,  "Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli")


class_per_sample$Class_plot<-as.character(ifelse(class_per_sample$Class %in% top_class, class_per_sample$Class, "Other"))

class_per_sample %>% group_by(Class) %>% summarize(mean = mean(Abundance)) %>% arrange(-mean)
class_per_sample$Class_plot<-factor(class_per_sample$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))


metadata<-data.frame(sample_data(meerkat_filtered))
names(metadata)
##  [1] "feature.id"          "IndividID"           "SampleDate"         
##  [4] "SampleTime"          "Storage"             "Seq_run"            
##  [7] "Imtechella"          "Allobacillus"        "Seq_depth"          
## [10] "scaling_factor"      "Seq_depth_nospikein" "BirthDate"          
## [13] "DeathDate"           "AgeAtSampling"       "GroupAtSampling"    
## [16] "Condition_weight"    "SocialStatus"        "Temp_max"           
## [19] "sum_rainfall_month"  "HoursAfterSunrise"   "TimeCat"            
## [22] "Year"                "month"               "Season"             
## [25] "TB_status"           "TB_exposure"         "TB_resistance"      
## [28] "TB_survival"         "TB_symptom_date"     "TB_exposure_date"   
## [31] "TB_stage"            "TotalAbundance"
metadata<-metadata[,c("feature.id", "SampleDate", "Year", "Storage", "HoursAfterSunrise", "TotalAbundance", "TB_stage", "Season", "Temp_max",  "sum_rainfall_month", "sum_rainfall_month", "Condition_weight", "GroupAtSampling")]

class_per_sample<- merge(class_per_sample, metadata, by = "feature.id")

## microbiome plot


pal<-brewer.pal(10,"Paired")
pal<-c(pal, "lightgrey")
pal<-pal[c(2,1,11,4,3,6,9,10)]
pal<-rev(pal)




plot_per_sample<-ggplot(class_per_sample, aes(y = Abundance, x = reorder(Sample, SampleDate), fill = Class_plot))+
  geom_col(position = "fill", stat="identity", size = 0)+ 
  scale_fill_manual(values = rev(pal))+
  ylab("Relative abundance")+
  theme(axis.text.x = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank())+
   scale_y_continuous(expand = c(0,0)) +
  labs(fill = "Bacterial class")+
   theme(plot.margin=unit(c(0.2,0.2,0.2,1),"cm"))+
  theme(legend.justification = c("left", "center"))

ggplot(class_per_sample, aes(y = Abundance, x = reorder(Sample, SampleDate), fill = Class_plot))+
  facet_grid(~Year, scales = "free")+
  geom_col(position = "fill", stat="identity", size = 0)+ 
  scale_fill_manual(values = rev(pal))+
  ylab("Relative abundance")+
  theme(axis.text.x = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank())+
   scale_y_continuous(expand = c(0,0)) +
  labs(fill = "Bacterial class")+
   theme(plot.margin=unit(c(0.2,0.2,0.2,1),"cm"))+
  theme(legend.justification = c("left", "center"))

## TB stage
levels(class_per_sample$TB_stage)
## [1] "Unexposed" "Exposed"   "Diseased"
levels(class_per_sample$TB_stage)<-c("TB unexposed", "TB exposed", "TB diseased")
#levels(class_per_sample$TB_stage_weight)<-c("TB unexposed", "TB exposed", "TB diseased")

table(class_per_sample$TB_stage)
## 
## TB unexposed   TB exposed  TB diseased 
##        43363        64890         9270
plot_TB_sample<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = TB_stage))+
  geom_tile()+
  theme_void()+
scale_fill_manual(values = c( "forestgreen", "skyblue","brown2"))+ 
  theme(axis.title.y = element_text(angle = 0))+
  ylab("TB")+
  theme(legend.position="right", legend.direction="vertical", 
       legend.justification = c("left", "top"),
        legend.key.height = unit(0.3, 'cm'),
        legend.title=element_blank())


# temp max

pal_dichromat<-dichromat::colorschemes$BluetoOrange.12

pal_dichromat[13]<-"black"
pal_dichromat[14]<-"black"


plot_temp1<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Temp_max))+geom_tile()+
  theme_void()+
 scale_fill_gradientn(colours = pal_dichromat)+
  theme(legend.position="right", legend.direction="vertical", 
         legend.justification = c("left", "top"),
        legend.key.height = unit(0.15, 'cm'),
        legend.title=element_blank()) +
  theme(axis.title.y = element_text(angle = 0))+
  ylab(" Max \ntemp")

## season

plot_season<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Season))+
  geom_tile()+
  theme_void()+
  scale_fill_manual(values = c("gold", "forestgreen"))+
  theme(legend.position="right", legend.direction="vertical", 
        legend.justification = c("left", "top"),
        legend.key.height = unit(0.1, 'cm'),
        legend.title=element_blank())+
   theme(axis.title.y = element_text(angle = 0))+
  ylab("Season")


# year

year_names <- c(
                    `1997` = "",
                    `1998` = "",
                    `1999` = "1999",
                    `2000` = "",
                    `2001` = "2001",
                    `2002` = "2002",
                    `2003` = "2003",
                    `2004` = "2004",
                    `2005` = "2005",
                    `2006` = "2006",
                    `2007` = "2007",
                    `2008` = "2008",
                    `2009` = "2009",
                    `2010` = "2010",
                    `2011` = "2011",
                    `2012` = "2012",
                    `2013` = "2013",
                    `2014` = "2014",
                    `2015` = "2015",
                    `2016` = "2016",
                    `2017` = "2017",
                    `2018` = "2018",
                    `2019` = "")


# 13 x 6

# ####methods 
# ####methods 
# ####methods 
# ####methods 

drought_years<- c(1997, 1998, 2000, 2006, 2013, 2014, 2015, 2017, 2018)

class_per_sample$Drought<-ifelse(class_per_sample$Year %in% drought_years, "Drought", "Normal")

drought_cols<- c("red", "red", "skyblue", "red", "skyblue", # 1997 - 2001
                 "skyblue","skyblue","skyblue","skyblue", "red", # 2002 - 2006
                 "skyblue","skyblue","skyblue","skyblue","skyblue", # 2007 - 2011
                 "red", "skyblue", "red", "red", "skyblue", # 2012 - 2016
                 "red", "red" , "skyblue" ) # 2017 - 2019




plot_year<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Drought))+
  geom_tile()+
  theme_void()+
 # scale_fill_viridis(discrete = F)+ 
  scale_fill_manual(values = c("red", "skyblue"))+
  facet_grid(~Year, scales = "free", space = "free_x",switch = "x", labeller = as_labeller(year_names))+

  theme(panel.spacing = unit(0.07, "lines"), 
         strip.background = element_blank(),
         strip.placement = "outside",
         strip.text.x = element_text(angle = 90, hjust = 0.5)) +
  
   xlab("")+
    theme(axis.title.y = element_text(angle = 0, hjust = ))+
  ylab("Year          ")+
  theme(axis.title.x = element_text(colour = "black"))+
  theme(plot.margin=unit(c(0,2.8,0.2,1.55),"cm")) +
    theme(legend.position="right", legend.direction="vertical", 
        legend.justification = c("left", "top"),
        legend.key.height = unit(0.2, 'cm'),
        legend.title=element_blank())



# rainfall

plot_rainfall<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = log10(sum_rainfall_month+1)))+geom_tile()+
  theme_void()+
 scale_fill_gradientn(colours = pal_dichromat[1:12])+
   theme(legend.position="right", legend.direction="vertical", 
        legend.justification = c("left", "top"),
        legend.key.height = unit(0.2, 'cm'),
        legend.title=element_blank())+
   theme(axis.title.y = element_text(angle = 0))+
  ylab("Rainfall")

## body condition

#scales::show_col(pal_dichromat_cond)

pal_dichromat_cond<-c("black", "black", "black", pal_dichromat[1:12])

pal_dichromat_cond[8]<-"white"
pal_dichromat_cond[9]<-"white"
pal_dichromat_cond[10]<-"white"

pal_dichromat_cond[16]<-"red"

plot_condition<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Condition_weight))+
  geom_tile()+
  theme_void()+
 scale_fill_gradientn(colours = pal_dichromat_cond)+
   theme(legend.position="right", legend.direction="vertical", 
        legend.justification = c("left", "top"),
        legend.key.height = unit(0.2, 'cm'),
        legend.title=element_blank())+
   theme(axis.title.y = element_text(angle = 0))+
  ylab("Condition")


## without storage


sup_figabc<-ggarrange(plot_per_sample, plot_TB_sample, plot_condition, plot_temp1, plot_rainfall, plot_season, ncol = 1, align = "v", heights = c(8,1,1, 1,  1,1, 1, 1.5), labels= c("a)", "b)", "c)", "d)", "e)", "f)"))

ggarrange(sup_figabc, plot_year, ncol = 1, heights = c(7,1.1), labels = c(NA, "g)"))

# fig 2

#ggsave("Figures/Fig2.pdf")
# Saving 16.4 x 7.97 in image

This chunk took 0.41 minutes

5 Fig. S2: Storage, time of day, and social group per sample

# social group

group_freq<-data.frame(table(class_per_sample$GroupAtSampling))

group_freq<-group_freq %>% arrange(-Freq)
head(group_freq, 10)
top_groups<-group_freq$Var1[1:15]

class_per_sample$GroupAtSampling_plot<-ifelse(class_per_sample$GroupAtSampling %in% top_groups, as.character(class_per_sample$GroupAtSampling), "Other")

mypal =     pal_lancet("lanonc", alpha = 1)(7)
mypal5<- pal_rickandmorty("schwifty")(8)
mypal6<-brewer.pal(12,"Paired")
mypal7<-brewer.pal(8,"Dark2")
mypalx<-c(mypal6, mypal7)


class_per_sample$GroupAtSampling_plot<-factor(class_per_sample$GroupAtSampling_plot)
levels(class_per_sample$GroupAtSampling_plot)
##  [1] "Aztecs"      "Baobab"      "Commandos"   "Drie Doring" "Elveera"    
##  [6] "Ewoks"       "Lazuli"      "Moomins"     "Other"       "Rascals"    
## [11] "Van Helsing" "Vivian"      "Whiskers"    "Young Ones"  "Zappa"      
## [16] "Zulus"
mypalx[9]<-"grey"



plot_group<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = GroupAtSampling_plot))+
  geom_tile()+
  theme_void()+
   scale_fill_manual(values = mypalx)+
   theme(axis.title.y = element_text(angle = 0))+
  ylab("Group")+
  labs(fill = "Group")+
  guides(fill=guide_legend(ncol=2))


# storage

plot_storage<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Storage))+
  geom_tile()+
 theme_void()+
 # theme_bw()+
  scale_fill_manual(values = c("skyblue", "navyblue"), guide = guide_legend(reverse = TRUE))+ 
  theme(legend.position="right", legend.direction="vertical", 
         legend.key.height = unit(0.2, 'cm'),
        legend.justification = c("left", "top"), legend.title=element_blank())+
   theme(plot.margin=unit(c(0,0.2,0,0.2),"cm"))+
   theme(axis.title.y = element_text(angle = 0))+
  ylab("Storage")

# time

plot_time<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = HoursAfterSunrise))+geom_tile()+
  theme_void()+
 scale_fill_viridis_c(option = "plasma", direction= -1)+
   theme(legend.position="right", legend.direction="horizontal", 
        legend.justification = c("left", "top"),
        legend.key.height = unit(0.2, 'cm'),
        legend.title=element_blank())+
   theme(axis.title.y = element_text(angle = 0))+
  ylab("Time \nof day")

ggarrange(plot_per_sample,  plot_group,plot_time, plot_storage, ncol = 1, align = "v", heights = c(8,1,1, 1), labels= c("a)", "b)", "c)", "d)"), legend = F)

This chunk took 0.14 minutes

6 Fig S3. Visualising raw data per individual

meerkat_merged_ID<-merge_samples(meerkat_filtered,"IndividID", fun=mean)
sample_data(meerkat_merged_ID)$IndividID<-sample_names(meerkat_merged_ID)
meerkat_merged_ID_class<-tax_glom(meerkat_merged_ID, taxrank = "Class")
sample_data(meerkat_merged_ID_class)<-sample_data(meerkat_merged_ID_class)[,"IndividID"]
meerkat_merged_ID_class<-microbiome::transform(meerkat_merged_ID_class, "compositional") 
class_per_ID<-psmelt(meerkat_merged_ID_class)

class_abundance<-class_per_ID %>% group_by(Class) %>% summarise(mean = mean(Abundance))
class_abundance<-class_abundance[order(-class_abundance$mean),]
top_class<-class_abundance$Class[1:7]

class_per_ID$Class_plot<-as.character(ifelse(class_per_ID$Class %in% top_class, class_per_ID$Class, "Other"))


class_per_ID$Class_plot<-factor(class_per_ID$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))

## order by birthyear #####
## order by birthyear #####
## order by birthyear #####
## order by birthyear #####

metadata<-data.frame(sample_data(meerkat_filtered))
metadata$BirthYear<-lubridate::year(sample_data(meerkat_filtered)$BirthDate)
metadata<-metadata[,c("feature.id", "IndividID", "TB_status", "BirthYear","BirthDate")]
head(metadata)
metadata2<-distinct(metadata, IndividID, .keep_all = T)
metadata2<-metadata2 %>% arrange(BirthDate)
head(metadata2)
ID_order<-metadata2$IndividID


class_per_ID<-merge(class_per_ID, metadata2, by.y = "IndividID", by.x = "Sample")

class_per_ID$TB_status<-factor(class_per_ID$TB_status, levels = c("unexposed", "exposed_asymptomatic", "exposed_symptomatic"))

levels(class_per_ID$TB_status)<-c("TB unexposed", "TB exposed", "TB diseased")

metadata2$IndividID<-factor(metadata2$IndividID, levels =ID_order)
class_per_ID$IndividID<-factor(class_per_ID$IndividID, levels =ID_order)


#class_per_ID<-class_per_ID%>% group_by(IndividID, Class_plot)%>%summarize(Abundance = sum(Abundance))
class_per_ID<-data.frame(class_per_ID)

#########################
#########################
#########################
#########################

microbiome_barplot <- ggplot(class_per_ID, aes(x = IndividID, y = Abundance, fill = Class_plot, col = Class_plot))+
  theme_gray(base_size = 14)+
  geom_bar( stat="identity", width = 0.5)+ 
  scale_fill_manual(values = rev(pal))+
  scale_color_manual(values = rev(pal), guide = "none")+
  theme(legend.position = "right")+
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())+
   theme(plot.margin=unit(c(0.4,0,0,0),"cm"))+
     scale_y_continuous(expand = c(0,0)) +
  labs(fill = "Bacterial class")+
  theme(legend.justification = c("left", "center"))


######## TB 

p2<-ggplot(metadata2, aes(x = IndividID, y = 1, fill = TB_status))+
  geom_tile(col = "white")+
  theme_void(base_size = 14)+
  scale_fill_manual(values = c("forestgreen", "cyan3", "brown2"))+
  theme(legend.position = "right", legend.justification = "left") +
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())+
  theme(axis.text.y = element_blank())+
      theme(axis.title.y = element_text(angle = 90, vjust=0.5))+
  ylab("TB \nstatus")+
   theme(plot.margin=unit(c(0,0,0,0.1),"cm"))+
  labs(fill = "TB status")



######## birth year




p3<-ggplot(metadata2, aes(y = 1, x = IndividID, fill = BirthYear))+
  geom_tile()+
  theme_void(base_size = 14)+
  scale_fill_viridis(discrete = F)+ 
  facet_grid(~BirthYear, scales = "free", space = "free_x",switch = "x")+

  theme(panel.spacing = unit(0.05, "lines"), 
         strip.background = element_blank(),
         strip.placement = "outside",
         strip.text.x = element_text(angle = 90, size = , hjust = 0.5)) +
  
   xlab("")+
    theme(axis.title.y = element_text(angle = 90, vjust=9))+
  ylab("Birth \nyear")+
  theme(axis.title.x = element_text(colour = "black"))+
   theme(plot.margin=unit(c(0, 5.7, 0.2,1.1),"cm"))+
  theme(legend.position="none") 



figsxab<-ggarrange(microbiome_barplot,p2, ncol = 1, align = "v", heights = c(2,0.5))

ggarrange(figsxab, p3, ncol = 1, heights = c(3,0.8))

#ggsave("Figures/FigS6.pdf")
# Saving 16.9 x 7.97 in image

This chunk took 0.1 minutes

  • changes in relative abundances of Bacteroidia and Bacilli
bacteroidia_2005<-subset(class_per_ID, Class_plot == "Bacteroidia" & BirthYear <2006 )
bacteroidia_2014<-subset(class_per_ID, Class_plot == "Bacteroidia"& BirthYear >2013 )

length(unique(bacteroidia_2005$IndividID))
## [1] 84
length(unique(bacteroidia_2014$IndividID))
## [1] 83
summary(bacteroidia_2005$Abundance)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.004754 0.017053 0.037109 0.048031 0.255336
sd(bacteroidia_2005$Abundance)
## [1] 0.04815908
summary(bacteroidia_2014$Abundance)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000689 0.048280 0.099646 0.100043 0.141796 0.327672
sd(bacteroidia_2014$Abundance)
## [1] 0.06791717
bacilli_2005<-subset(class_per_ID, Class_plot == "Bacilli" & BirthYear <2006 )
bacilli_2014<-subset(class_per_ID, Class_plot == "Bacilli"& BirthYear >2014 )

summary(bacilli_2005$Abundance)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.006604 0.062771 0.095245 0.131469 0.163677 0.765236
sd(bacilli_2005$Abundance)
## [1] 0.1176245
summary(bacilli_2014$Abundance)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.009551 0.033834 0.052679 0.074724 0.103429 0.326442
sd(bacilli_2014$Abundance)
## [1] 0.06493677

This chunk took 0 minutes

7 Figs S4, S5 and S9. Split by storage, season and group

storage_ps_class<-tax_glom(meerkat_filtered, taxrank = "Class")

storage_ps_class<-core(storage_ps_class, detection = 10, prevalence = 0)

sample_data(storage_ps_class)<-sample_data(storage_ps_class)[,c("feature.id", "Storage", "IndividID", "Year", "TimeCat")]
class_per_storage<-psmelt(storage_ps_class)

class_per_storage$Class_plot<-as.character(ifelse(class_per_storage$Class %in% top_class, class_per_storage$Class, "Other"))



class_per_storage$Class_plot<-factor(class_per_storage$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))


class_per_storage$Storage<-factor(class_per_storage$Storage, levels = c("FROZEN", "FREEZEDRIED"))


################

class_per_storage$Storage<-factor(class_per_storage$Storage, levels = c("FROZEN", "FREEZEDRIED"))

ggplot(class_per_storage, aes(x =Year, y = Abundance, fill = Class_plot))+
  geom_col(position = "fill", stat="identity", size = 0)+ 
  ylab("Relative abundance")+
 facet_wrap(~Storage, ncol = 1, strip.position="right")+
  scale_fill_manual(values = rev(pal))+
  xlim(c(1996, 2020))+
 # theme(plot.margin=unit(c(0.2,0,0,0.1),"cm"))+
  labs(fill = "Bacterial class")+
  ggtitle("Yearly composition by storage method")

This chunk took 0.06 minutes

  • Time of day
# time time of day

class_per_storage$TimeCat <- factor(class_per_storage$TimeCat, levels = c( "MORNING", "AFTERNOON"))

ggplot(class_per_storage, aes(x =Year, y = Abundance, fill = Class_plot))+
  geom_col(position = "fill", stat="identity", size = 0)+ 
  ylab("Relative abundance")+
 facet_wrap(~TimeCat, ncol = 2)+
  scale_fill_manual(values = rev(pal))+
  xlim(c(1996, 2020))+
 # theme(plot.margin=unit(c(0.2,0,0,0.1),"cm"))+
  labs(fill = "Bacterial class")+
  ggtitle("Yearly composition by time of day")

This chunk took 0.04 minutes

  • Season
## season

season_ps_class<-tax_glom(meerkat_filtered, taxrank = "Class")

season_ps_class<-core(season_ps_class, detection = 10, prevalence = 0)

sample_data(season_ps_class)<-sample_data(season_ps_class)[,c("feature.id", "Season", "IndividID", "Year")]
class_per_season<-psmelt(season_ps_class)

class_per_season$Class_plot<-as.character(ifelse(class_per_season$Class %in% top_class, class_per_season$Class, "Other"))



class_per_season$Class_plot<-factor(class_per_season$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))


################


ggplot(class_per_season, aes(x =Year, y = Abundance, fill = Class_plot))+
  geom_col(position = "fill", stat="identity", size = 0)+ 
  ylab("Relative abundance")+
 facet_wrap(~Season, ncol = 1, strip.position="right")+
  scale_fill_manual(values = rev(pal))+
  xlim(c(1996, 2020))+
 # theme(plot.margin=unit(c(0.2,0,0,0.1),"cm"))+
  labs(fill = "Bacterial class")+
  ggtitle("Yearly composition by season")

This chunk took 0.07 minutes

  • Social groups
metadata_groups<-data.frame(sample_data(meerkat_filtered))

group_freq<-data.frame(table(metadata_groups$GroupAtSampling))

group_freq<-group_freq %>% arrange(-Freq)
head(group_freq, 10)
top_groups<-group_freq$Var1[1:15]


group_ps_class<-tax_glom(meerkat_filtered, taxrank = "Class")

group_ps_class<-core(group_ps_class, detection = 10, prevalence = 0)

sample_data(group_ps_class)<-sample_data(group_ps_class)[,c("feature.id", "Storage", "IndividID", "Year", "SampleDate", "GroupAtSampling")]
class_per_storage<-psmelt(group_ps_class)

class_per_storage$Class_plot<-as.character(ifelse(class_per_storage$Class %in% top_class, class_per_storage$Class, "Other"))



class_per_storage$Class_plot<-factor(class_per_storage$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))

class_per_storage$Group_plot<-ifelse(class_per_storage$GroupAtSampling %in% top_groups, as.character(class_per_storage$GroupAtSampling), "Other")



class_per_storage$Group_plot<-forcats::fct_reorder(class_per_storage$Group_plot, class_per_storage$Year, mean)

length(unique(subset(class_per_storage, Group_plot != "Other")$feature.id))
## [1] 1003
ggplot(subset(class_per_storage, Group_plot != "Other"), aes(x =Year, y = Abundance, fill = Class_plot))+
  geom_col(position = "fill", stat="identity", size = 0)+ 
  ylab("Realtive abundance")+
  scale_fill_manual(values = rev(pal))+
  facet_wrap(~Group_plot, ncol = 1, strip.position="right")+
  ggtitle("Yearly composition by major social groups")+
  scale_y_continuous(breaks = c(0.2, 0.4, 0.6, 0.8))+
  labs(fill = "Bacterial class")

This chunk took 0.06 minutes

8 Fig 3: Microbiome composition by year barplot

meerkat_merged_year<-merge_samples(meerkat_filtered,"Year", fun=mean)

sample_data(meerkat_merged_year)$Year<-sample_names(meerkat_merged_year)

meerkat_merged_year_class<-tax_glom(meerkat_merged_year, taxrank = "Class")

sample_data(meerkat_merged_year_class)<-sample_data(meerkat_merged_year_class)[,1]

#meerkat_merged_year_class<-microbiome::transform(meerkat_merged_year_class, "compositional") 
class_per_year<-psmelt(meerkat_merged_year_class)



class_per_year$Class_plot<-as.character(ifelse(class_per_year$Class %in% top_class, class_per_year$Class, "Other"))

class_per_year$Class_plot<-factor(class_per_year$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))

class_per_year$Sample<- as.integer(class_per_year$Sample)

class_per_year<-class_per_year%>% group_by(Sample, Class_plot)%>%summarize(Abundance = sum(Abundance))
class_per_year<-data.frame(class_per_year)
class_per_year$Sample<-factor(class_per_year$Sample)


plot_mb_year<-ggplot(class_per_year, aes(x =Sample, y = Abundance, fill = Class_plot))+
  geom_bar( stat="identity",  size = 0, position = "fill")+ 
   theme_bw()+
  scale_fill_manual(values = rev(pal))+
  theme(legend.position = "right")+
  xlab("")+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.9, hjust=1))+
  labs(fill = "Bacterial class")+
   scale_y_continuous(expand = c(0,0))+
  xlab("Year")


year_freq<-data.frame(table(sample_data(meerkat_filtered)$Year))


year_hist<-ggplot(year_freq, aes(x = Var1, y = Freq)) + geom_col(fill = "lightgray", col = "black")+
# theme(plot.margin=unit(c(0.2,5,0.2,0.2),"cm"))+
  theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
  theme(legend.position = "none")+
  scale_y_sqrt(breaks = c(0,20,50,100,200))+
  geom_hline(yintercept = 20, col = "black", linetype = "longdash")

year_plot<-ggarrange(year_hist, plot_mb_year, ncol = 1, align = "v", heights = c(1,7), legend = "none")+
     theme(plot.margin=unit(c(0.2,0.2,0.2,0.6),"cm"))

## tb

TB_summary_year2<-subset(TB_summary_year, TB_status == "TB diseased")



extra_rows<-data.frame(as.integer(1997), as.factor("TB diseased")   , as.integer(0) ,  as.integer(165) ,as.numeric(0) ,as.numeric(0))
names(extra_rows)<-names(TB_summary_year2)

extra_rows2<-data.frame(as.integer(1998), as.factor("TB diseased")   , as.integer(0) ,  as.integer(165) ,as.numeric(0) ,as.numeric(0))
names(extra_rows2)<-names(TB_summary_year2)


TB_summary_year3<-rbind(TB_summary_year2, extra_rows, extra_rows2)
TB_summary_year3$Year<-factor(TB_summary_year3$Year)
summary(TB_summary_year3)
##       Year           TB_status       freq           Total      
##  1997   : 1   TB unexposed: 0   Min.   : 0.00   Min.   :165.0  
##  1998   : 1   TB exposed  : 0   1st Qu.: 7.50   1st Qu.:284.5  
##  1999   : 1   TB diseased :23   Median :37.00   Median :356.0  
##  2000   : 1                     Mean   :35.48   Mean   :341.3  
##  2001   : 1                     3rd Qu.:59.00   3rd Qu.:416.5  
##  2002   : 1                     Max.   :81.00   Max.   :452.0  
##  (Other):17                                                    
##    Proportion            se          
##  Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.02382   1st Qu.:0.008278  
##  Median :0.09160   Median :0.014551  
##  Mean   :0.10353   Mean   :0.013787  
##  3rd Qu.:0.15807   3rd Qu.:0.018940  
##  Max.   :0.29412   Max.   :0.029535  
## 
TB_hist<-ggplot(TB_summary_year3, aes(y = Proportion, x = Year))+
  geom_col(fill = "brown2", col = "black")+
# theme(plot.margin=unit(c(0.2,5,0.2,0.2),"cm"))+
  theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
  theme(legend.position = "none")

year_plot<-ggarrange(year_hist, TB_hist, plot_mb_year, ncol = 1, align = "v", heights = c(1,1, 5), legend = "none", labels = c("b)", "c)", "d)"), label.x = -0.07)+
     theme(plot.margin=unit(c(0.2,0.2,0.2,1),"cm"))

year_plot

This chunk took 0.03 minutes

9 Constrained ordination

  • Constrained ordination using CAP
  • Also test all supertypes
scaled_core<-microbiome::core(meerkat_filtered, detection = 100, prevalence = 0.01)

scaled_core<-tax_glom(meerkat_filtered, taxrank = "Genus")

scaled_core<-microbiome::core(scaled_core, detection = 10, prevalence = 0.005)

scaled_core
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 499 taxa and 1141 samples ]:
## sample_data() Sample Data:        [ 1141 samples by 32 sample variables ]:
## tax_table()   Taxonomy Table:     [ 499 taxa by 7 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 499 tips and 498 internal nodes ]:
## taxa are rows

This chunk took 0.05 minutes

  • organise variables
# otutable

otutable<-data.frame(t(scaled_core@otu_table@.Data))
dim(otutable)
## [1] 1141  499
otutable[1:5,1:5]
colnames(otutable)<-taxa_names(scaled_core)

metadata<-data.frame(sample_data(scaled_core))
names(metadata)
##  [1] "feature.id"          "IndividID"           "SampleDate"         
##  [4] "SampleTime"          "Storage"             "Seq_run"            
##  [7] "Imtechella"          "Allobacillus"        "Seq_depth"          
## [10] "scaling_factor"      "Seq_depth_nospikein" "BirthDate"          
## [13] "DeathDate"           "AgeAtSampling"       "GroupAtSampling"    
## [16] "Condition_weight"    "SocialStatus"        "Temp_max"           
## [19] "sum_rainfall_month"  "HoursAfterSunrise"   "TimeCat"            
## [22] "Year"                "month"               "Season"             
## [25] "TB_status"           "TB_exposure"         "TB_resistance"      
## [28] "TB_survival"         "TB_symptom_date"     "TB_exposure_date"   
## [31] "TB_stage"            "TotalAbundance"
## TB variables

TB_stage<-metadata$TB_stage
TB_status<-metadata$TB_status
TB_survival<-metadata$TB_survival


# condition and season

summary(metadata$Condition_weight)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -274.47  -57.95  -15.40  -14.00   28.04  245.68
metadata$Condition_cat <- ifelse(metadata$Condition_weight < summary(metadata$Condition_weight)[2], "BAD", "AVERAGE")
metadata$Condition_cat <- ifelse(metadata$Condition_weight > summary(metadata$Condition_weight)[5], "GOOD", metadata$Condition_cat)

condition<-metadata$Condition_cat
season <- metadata$Season

## rainfall

summary(log10(metadata$sum_rainfall_month+1))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.8062  0.7770  1.3385  2.2896
metadata$Rainfall_cat <- ifelse(metadata$sum_rainfall_month == summary(metadata$sum_rainfall_month)[2], "LOW", "AVERAGE")
metadata$Rainfall_cat <- ifelse(metadata$sum_rainfall_month > summary(metadata$sum_rainfall_month)[5], "HIGH", metadata$Rainfall_cat)
table(metadata$Rainfall_cat)
## 
## AVERAGE    HIGH     LOW 
##     541     281     319
# temp
hist(metadata$Temp_max)

summary(metadata$Temp_max)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.60   26.80   32.30   31.25   36.10   43.00
metadata$Max_temp_cat <- ifelse(metadata$Temp_max < summary(metadata$Temp_max)[2], "LOW", "AVERAGE")
metadata$Max_temp_cat <- ifelse(metadata$Temp_max > summary(metadata$Temp_max)[5], "HIGH", metadata$Max_temp_cat)
table(metadata$Max_temp_cat)
## 
## AVERAGE    HIGH     LOW 
##     577     283     281
## max temp

max_temp<-metadata$Max_temp_cat
#hist(max_temp)

# year

hist(metadata$Year)

metadata<-metadata %>% mutate(YearCat = case_when((Year <2005 ~ "Pre-2005"),
                                                  (Year >=2005 & Year < 2010) ~ "2005-2010",
                                                  (Year >=2010 & Year < 2015) ~ "2010-2015",
                                                  (Year >=2015) ~ "2015-2019"))
                                                  
  

YearCat<-as.character(metadata$YearCat)

### control variables
IndividID<-metadata$IndividID
time<-metadata$TimeCat
maxtemp<-metadata$Max_temp_cat
samplingtemp<-metadata$HourlyTemp
foraging<-log10(metadata$HoursSinceForagingStart+0.1)
rainfall<-metadata$Rainfall_cat
# methods
storage<-metadata$Storage
seq_depth<-as.numeric(scale(metadata$Seq_depth_nospikein))
seq_run<-metadata$Seq_run

This chunk took 0 minutes

  • Generate distance matrices
###### genus level 

wunifrac_dist<-distance(scaled_core, method = "wunifrac")

unifrac_dist<-distance(scaled_core, method = "unifrac")

bray_dist<-distance(scaled_core, method = "bray")


# asv level ###

scaled_core_asv<-microbiome::core(meerkat_filtered, detection = 100, prevalence = 0.005)

wunifrac_dist_asv<-distance(scaled_core_asv, method = "wunifrac")

unifrac_dist_asv<-distance(scaled_core_asv, method = "unifrac")

bray_dist_asv<-distance(scaled_core_asv, method = "bray")

This chunk took 18.43 minutes

9.0.1 Apply constrained ordination models (dbRDA)

# weighted unifrac

final_model<-capscale(wunifrac_dist ~ 
                       time + 
                        TB_stage+
                        condition+
                        rainfall+
                       YearCat+
                        storage+
                        seq_depth+
                       IndividID, 
                env = metadata, comm = otutable) 

# weighted unifrac

final_model_anova<-anova.cca(final_model, by="terms")
final_model_anova
final_model_anova_df<-data.frame(final_model_anova)
RsquareAdj(final_model)
## $r.squared
## [1] 0.4318683
## 
## $adj.r.squared
## [1] 0.2755368
# write.csv(final_model_anova_df, "dbrda_wunifrac_summary_genus.csv")



## unweighted unifrac

unifrac_model<-capscale(unifrac_dist ~ 
                       time + 
                        TB_stage+
                        condition+
                         rainfall+
                       YearCat+
                        storage+
                        seq_depth+
                       IndividID, 
                env = metadata, comm = otutable) 

## unweighted unifrac ##

unifrac_anova<-anova.cca(unifrac_model, by="terms")
unifrac_anova
unifrac_anova_df<-data.frame(unifrac_anova)
#write.csv(unifrac_anova_df, "dbrda_unifrac_summary_genus.csv")


bray_model<-capscale(bray_dist ~ 
                       time + 
                        TB_stage+
                        condition+
                          rainfall+
                       YearCat+
                        storage+
                        seq_depth+
                       IndividID, 
                env = metadata, comm = otutable) 

bray_anova<-anova.cca(bray_model, by="terms")
bray_anova
bray_anova_df<-data.frame(bray_anova)
#write.csv(bray_anova_df, "dbrda_bray_summary_genus.csv")


#### ASV level ########
#### ASV level ########
#### ASV level ########
#### ASV level ########
#### ASV level ########


# weighted unifrac

wunifrac_model_asv<-capscale(wunifrac_dist_asv ~ 
                       time + 
                        TB_stage+
                        condition+
                        rainfall+
                       YearCat+
                        storage+
                        seq_depth+
                       IndividID, 
                env = metadata, comm = otutable) 

# weighted unifrac

wunifrac_asv_anova<-anova.cca(wunifrac_model_asv, by="terms")
wunifrac_asv_anova
## unweighted unifrac

unifrac_model_asv<-capscale(unifrac_dist_asv ~ 
                       time + 
                        TB_stage+
                        condition+
                         rainfall+
                       YearCat+
                        storage+
                        seq_depth+
                       IndividID, 
                env = metadata, comm = otutable) 

## unweighted unifrac ##

unifrac_asv_anova<-anova.cca(unifrac_model_asv, by="terms")
unifrac_asv_anova
bray_model_asv<-capscale(bray_dist_asv ~ 
                       time + 
                        TB_stage+
                        condition+
                          rainfall+
                       YearCat+
                        storage+
                        seq_depth+
                       IndividID, 
                env = metadata, comm = otutable) 

bray_asv_anova<-anova.cca(bray_model_asv, by="terms")
bray_asv_anova

This chunk took 35.02 minutes

  • Get dominant classes per sample
##dominant taxa

dominant_phylo<-scaled_core
#dominant_phylo<-scaled_core_genus
sample_data(dominant_phylo)<-sample_data(dominant_phylo)[,"feature.id"]

# melt data
psmelt_df<-speedyseq::psmelt(dominant_phylo)
head(psmelt_df)
dominant_taxa_df<-psmelt_df %>% 
  group_by(feature.id) %>% 
  arrange(feature.id, -Abundance) %>% 
  dplyr::mutate(rank=rank(-Abundance))

head(dominant_taxa_df)
dominant_taxa_df<-subset(dominant_taxa_df, rank ==1)
dominant_taxa_df$Class<-factor(dominant_taxa_df$Class)

# add to metadata 
sample_data(scaled_core)$DomClass <-vlookup(sample_data(scaled_core)$feature.id, dominant_taxa_df, lookup_column = "Sample", result_column = "Class")

top_class
## [1] "Clostridia"          "Bacilli"             "Bacteroidia"        
## [4] "Coriobacteriia"      "Actinobacteria"      "Fusobacteriia"      
## [7] "Gammaproteobacteria"
sample_data(scaled_core)$DomClass_plot<-ifelse(sample_data(scaled_core)$DomClass %in% top_class, as.character(sample_data(scaled_core)$DomClass), "Other")
unique(sample_data(scaled_core)$DomClass_plot)
## [1] "Clostridia"          "Other"               "Bacilli"            
## [4] "Actinobacteria"      "Gammaproteobacteria" "Coriobacteriia"     
## [7] "Fusobacteriia"       "Bacteroidia"
#sample_data(scaled_core)$DomClass_plot<-factor(sample_data(scaled_core)$DomClass_plot, levels =c(top_class, "Other"))

sample_data(scaled_core)$DomClass_plot<-factor(sample_data(scaled_core)$DomClass_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
"Coriobacteriia","Actinobacteria",    "Bacilli"))

pal<-brewer.pal(10,"Paired")
pal<-c(pal, "darkgrey")
pal<-pal[c(2,1,11,4,3,6,9,10)]
pal<-rev(pal)

This chunk took 0.01 minutes

9.0.2 Oridination figure

######################################

## extract data from model

final_model_df<-scores(final_model)
str(final_model_df)
## List of 3
##  $ species  : num [1:499, 1:2] -2.34e-04 -1.57e-04 3.19e-06 1.82e-05 1.39e-04 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:499] "a29558bc215cfe6f913afafef02450b9" "de14e4fbd65f5ea24336f74fca93998d" "b1a40d7ca4ef789afb38c8de415893eb" "bd28d84ee5305bf27575469eb63ba948" ...
##   .. ..$ : chr [1:2] "CAP1" "CAP2"
##  $ sites    : num [1:1141, 1:2] -0.658 -0.164 -0.795 1.257 0.517 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:1141] "Feature1" "Feature3" "Feature5" "Feature7" ...
##   .. ..$ : chr [1:2] "CAP1" "CAP2"
##  $ centroids: num [1:252, 1:2] 0.5301 -0.271 -0.0647 0.0524 -0.0639 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:252] "timeAFTERNOON" "timeMORNING" "TB_stageUnexposed" "TB_stageExposed" ...
##   .. ..$ : chr [1:2] "CAP1" "CAP2"
##  - attr(*, "const")= num 17.6
# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)

vectors_df$feature.id<-row.names(vectors_df)

# merge with info on dominant family

sample_metadata<-data.frame(sample_data(scaled_core))[,c("feature.id", "DomClass_plot")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")



#### add arrows #########
#### add arrows #########
#### add arrows #########
#### add arrows #########

centroids_df<-data.frame(final_model_df$centroids)
centroids_df<-centroids_df[c(1:5,7:8,10:15),]
#centroids_df<-centroids_df[c(1,2,4,6:10,12:15),]
centroids_df$Label<-c("Afternoon", "Morning",  "TB unexposed", "TB exposed", "TB clinical signs", "Bad condition", "Good condition","High rainfall", "Low rainfall" , "2005-2010", "2010-2015", "2015-2019", "Pre-2005")

# scale so they are more plottable


centroids_df[1,1] <-centroids_df[1,1] *0.7
centroids_df[1,2] <-centroids_df[1,2] *0.7

centroids_df[13,1] <-centroids_df[13,1] *0.7
centroids_df[13,2] <-centroids_df[13,2] *0.7

centroids_df$CAP1<-centroids_df$CAP1 * 1.5
centroids_df$CAP2<-centroids_df$CAP2 * 1.5

ggplot(vectors_df, aes(x = CAP1, y = CAP2))+
  geom_point(aes(fill = DomClass_plot), pch = 21, size = 4)+
  scale_fill_manual(values = rev(pal))+
  theme_bw()+
  
  # add arrows
  
  geom_segment(data=centroids_df, aes(x = 0, y = 0, xend = CAP1*3, yend = CAP2*3),
    arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1,  col = "black")+
  ggrepel::geom_label_repel(data=centroids_df,  
    alpha = 0.9, col = "black", size = 3, fill = "yellow", 
    #hjust = c(0,1), 
    #vjust = c(1,1),
    aes(CAP1 *3, CAP2*3, label = Label))

### add species scores ###
### add species scores ###
### add species scores ###
### add species scores ###


species_scores<-data.frame(final_model_df$species)

hist(species_scores$CAP1, breaks = 50)

hist(species_scores$CAP2, breaks = 50)

summary(species_scores$CAP1)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -7.010728 -0.000455  0.000018 -0.028936  0.000136  0.110956
summary(species_scores$CAP2)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -2.8140492 -0.0010509 -0.0000452 -0.0242252  0.0000738  0.5422269
species_scores<-subset(species_scores, CAP1 < -1  | CAP2 > 0.2 | CAP2 < -0.2)

#species_scores$CAP1<-species_scores$CAP1*0.8
#species_scores$CAP2<-species_scores$CAP2*0.8



species_scores$ASV<-row.names(species_scores)

taxonomy<-data.frame(tax_table(scaled_core))
head(taxonomy)
taxonomy$ASV<-row.names(taxonomy)

species_scores<-merge(species_scores, taxonomy, by = "ASV")

subset(species_scores, Genus == "Lactococcus")
#################

species_scores[4,2]<-species_scores[4,2]*0.2
species_scores[4,3]<-species_scores[4,3]*0.2


species_scores[5,2]<-species_scores[5,2]*0.8
species_scores[5,3]<-species_scores[5,3]*0.8


species_scores<-species_scores[-6,]

species_scores<-subset(species_scores, 
                       Genus == "Clostridium sensu stricto 1" |
                         Genus == "Blautia" |
                         Genus == "Fusobacterium" |
                         Genus == "Bacteroides" |
                         Genus == "Phascolarctobacterium" |
                         Genus == "Lactococcus" |
                         Genus == "Paeniclostridium" |
                         Genus == "[Ruminococcus] torques group")

species_scores[6,3]<-species_scores[6,3]*0.8
species_scores[6,2]<-species_scores[6,2]*0.8

###################
###################
###################
###################


ord_plot<-ggplot(vectors_df, aes(x = CAP1, y = CAP2))+
  geom_point(aes(fill = DomClass_plot), pch = 21, size = 3)+
  scale_fill_manual(values = rev(pal))+
  theme_bw()+
  xlab("CAP1 (40 %)")+ylab("CAP2 (14%)")+
 guides(fill = guide_legend(override.aes = list(size = 7)))+
  labs(fill = "Bacterial class")+
  
  # add arrows
  
  geom_segment(data=centroids_df, aes(x = 0, y = 0, xend = CAP1*3, yend = CAP2*3),
    arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1,  col = "black")+
  ggrepel::geom_label_repel(data=centroids_df,  
    alpha = 0.9, col = "black", size = 3, fill = "yellow", 
    aes(CAP1 *3, CAP2*3, label = Label))+
  
  # add species
  geom_point(data = species_scores,  size = 4, col = "black", pch = 8, stroke = 0, alpha = 0)+
   ggrepel::geom_label_repel(data=species_scores,  
    alpha = 0.9, col = "black", size = 2.5, fill = "skyblue", 
    aes( label = Genus))



ggarrange(ord_plot,year_plot,  ncol = 2 , common.legend = T, legend = "right", widths = c(3.5,2), labels = c("a)", NA))

#ggsave("Figures/Fig3.pdf")


#Saving 11.6 x 5.91 in image

This chunk took 0.07 minutes

10 Model genus level changes

10.0.1 Model year as a smoothed continuous variable

Make an ‘other’ category for rare taxa

genus_ps<-tax_glom(meerkat_filtered, taxrank = "Genus")

genus_abundances_df<-data.frame(taxa_sums(genus_ps))

genus_abundances_df$ASV<-row.names(genus_abundances_df)

taxonomy<-data.frame(tax_table(genus_ps))
taxonomy$ASV<-row.names(taxonomy)


genus_abundances_df<-merge(genus_abundances_df, taxonomy, by = "ASV")
names(genus_abundances_df)[2]<-"Abundance"

genus_abundances_df<-genus_abundances_df %>% dplyr::arrange(-Abundance)

top_genera<- as.character(genus_abundances_df$ASV[1:30])

sample_data(genus_ps)<-sample_data(genus_ps)[,c( "feature.id")]

top_genera_melt<-psmelt(genus_ps)

# make a category for rare genera

top_genera_melt$Genus2<-ifelse(top_genera_melt$OTU %in% top_genera, top_genera_melt$Genus, "Rare taxa")


top_genera_melt2<-top_genera_melt %>% group_by(Sample, Genus2) %>% summarize(Abundance2 = sum(Abundance))


metadata<-data.frame(sample_data(meerkat_filtered))

top_genera_melt3<- merge(top_genera_melt2, metadata, by.x = "Sample", by.y = "feature.id")
names(top_genera_melt3)
##  [1] "Sample"              "Genus2"              "Abundance2"         
##  [4] "IndividID"           "SampleDate"          "SampleTime"         
##  [7] "Storage"             "Seq_run"             "Imtechella"         
## [10] "Allobacillus"        "Seq_depth"           "scaling_factor"     
## [13] "Seq_depth_nospikein" "BirthDate"           "DeathDate"          
## [16] "AgeAtSampling"       "GroupAtSampling"     "Condition_weight"   
## [19] "SocialStatus"        "Temp_max"            "sum_rainfall_month" 
## [22] "HoursAfterSunrise"   "TimeCat"             "Year"               
## [25] "month"               "Season"              "TB_status"          
## [28] "TB_exposure"         "TB_resistance"       "TB_survival"        
## [31] "TB_symptom_date"     "TB_exposure_date"    "TB_stage"           
## [34] "TotalAbundance"
head(top_genera_melt3)

This chunk took 0.11 minutes

# loop to fit GAMMs to each genus

year_estimates<-list()

taxanames<-unique(top_genera_melt3$Genus2)

for (i in 1:length(taxanames)){
  tryCatch({ #catch errors
    print(i)
    print(taxanames[i])
    
    taxa1<-subset(top_genera_melt3, Genus2 == taxanames[i])
    # taxa1<-subset(top_genera_melt3, Genus2 == "Rare taxa")
    
    metadata<-taxa1
    
    # hist(log10(metadata$AgeAtSampling))
    # hist(log10(metadata$TotalAbundance))
    # hist(metadata$Seq_depth_nospikein)
    
    # scale variables
    
    metadata$AgeAtSampling<-as.numeric(scale(log10(metadata$AgeAtSampling)))
    metadata$TotalAbundance<-as.numeric(scale(log10(metadata$TotalAbundance)))
    metadata$Seq_depth_nospikein<-as.numeric(scale(log10(metadata$Seq_depth_nospikein)))
    
    
    # fit gamm    
    m_taxa <- mgcv::bam(log10(Abundance2+1)~
                          s(HoursAfterSunrise, bs = "cr")+
                          s(month, bs = "cc")+
                          s(AgeAtSampling, bs = "cr")+
                          s(Seq_depth_nospikein, bs = "cr")+
                          s(Year, bs = "cr", k = 5)+
                          s(TotalAbundance, bs = "cr")+
                          Storage+
                          Seq_run+
                          s(IndividID, bs = "re")+
                          s(GroupAtSampling, bs = "re"),
                        #correlation = corARMA(form = ~ 1|Year, p = 3),
                        data=metadata,
                        family = gaussian)
    
    print(summary(m_taxa))
    gam.check(m_taxa)
    
    
    
    confint_df_year<-stats::confint(m_taxa, parm = "s(Year)",  type = "confidence")
    confint_df_year$Taxa<-taxanames[i]
    confint_df<-confint_df_year
    
    # convert into real estimate by adding the intercapt (= mean abundance)
    
    confint_df$est_real<-confint_df$est+summary(m_taxa)$p.coeff[1] 
    confint_df$lower_real<- confint_df$lower+summary(m_taxa)$p.coeff[1] 
    confint_df$upper_real<- confint_df$upper+summary(m_taxa)$p.coeff[1] 
    
    ## add effect size and p value
    
    summary<-summary(m_taxa)
    confint_df$effect_size<-  summary$s.table[5,3]
    confint_df$P_val<-  summary$s.table[5,4]
    
    year_estimates[[i]]<-confint_df
    
    
    
  }, error=function(e){})
}
## [1] 1
## [1] "[Ruminococcus] torques group"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.16009    0.05302  59.603   <2e-16 ***
## StorageFROZEN -0.14738    0.07757  -1.900   0.0577 .  
## Seq_runRUN2    0.05483    0.06743   0.813   0.4163    
## Seq_runRUN3    0.14268    0.07035   2.028   0.0428 *  
## Seq_runRUN4    0.15657    0.15048   1.040   0.2984    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df       F  p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000   3.442  0.06382 .  
## s(month)               3.727e+00   8.000   1.192  0.02775 *  
## s(AgeAtSampling)       2.644e+00   3.289   7.748 2.73e-05 ***
## s(Seq_depth_nospikein) 2.638e+00   3.373   1.592  0.24084    
## s(Year)                2.234e+00   2.720   4.969  0.00451 ** 
## s(TotalAbundance)      4.237e+00   5.159 147.411  < 2e-16 ***
## s(IndividID)           1.134e+01 234.000   0.052  0.22592    
## s(GroupAtSampling)     1.037e-04  41.000   0.000  0.46613    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.619   Deviance explained =   63%
## fREML = 1255.3  Scale est. = 0.5038    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-2.213887e-06,5.168926e-06]
## (score 1255.34 & scale 0.5037998).
## Hessian positive definite, eigenvalue range [2.131302e-06,565.5707].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value    
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    0.93   0.015 *  
## s(month)               8.00e+00 3.73e+00    0.98   0.205    
## s(AgeAtSampling)       9.00e+00 2.64e+00    1.01   0.545    
## s(Seq_depth_nospikein) 9.00e+00 2.64e+00    0.97   0.105    
## s(Year)                4.00e+00 2.23e+00    0.97   0.170    
## s(TotalAbundance)      9.00e+00 4.24e+00    0.93  <2e-16 ***
## s(IndividID)           2.35e+02 1.13e+01      NA      NA    
## s(GroupAtSampling)     4.20e+01 1.04e-04      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 2
## [1] "Alloprevotella"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.84199    0.09407   8.951   <2e-16 ***
## StorageFROZEN  0.05472    0.11404   0.480    0.631    
## Seq_runRUN2    0.11416    0.11538   0.989    0.323    
## Seq_runRUN3   -0.05323    0.11832  -0.450    0.653    
## Seq_runRUN4   -0.08494    0.18080  -0.470    0.639    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                             edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)    3.01031   3.649  4.022  0.00636 ** 
## s(month)                0.06256   8.000  0.008  0.34233    
## s(AgeAtSampling)        3.74623   4.532  8.324 6.83e-07 ***
## s(Seq_depth_nospikein)  1.00002   1.000  5.076  0.02446 *  
## s(Year)                 2.65771   3.128 10.148 1.50e-06 ***
## s(TotalAbundance)       4.37383   5.277 40.765  < 2e-16 ***
## s(IndividID)           59.05731 234.000  0.408 1.79e-05 ***
## s(GroupAtSampling)      7.68720  41.000  0.573  0.00444 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.428   Deviance explained = 47.1%
## fREML = 1635.1  Scale est. = 0.9264    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-5.348667e-06,7.043528e-06]
## (score 1635.138 & scale 0.9264015).
## Hessian positive definite, eigenvalue range [5.34862e-06,567.1029].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value    
## s(HoursAfterSunrise)     9.0000   3.0103    0.98    0.28    
## s(month)                 8.0000   0.0626    0.97    0.20    
## s(AgeAtSampling)         9.0000   3.7462    0.97    0.14    
## s(Seq_depth_nospikein)   9.0000   1.0000    0.93  <2e-16 ***
## s(Year)                  4.0000   2.6577    0.97    0.09 .  
## s(TotalAbundance)        9.0000   4.3738    1.01    0.65    
## s(IndividID)           235.0000  59.0573      NA      NA    
## s(GroupAtSampling)      42.0000   7.6872      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 3
## [1] "Bacillus"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.14964    0.09511  12.088   <2e-16 ***
## StorageFROZEN -0.16268    0.12760  -1.275    0.203    
## Seq_runRUN2    0.01438    0.11933   0.120    0.904    
## Seq_runRUN3   -0.15825    0.12391  -1.277    0.202    
## Seq_runRUN4   -0.24584    0.21424  -1.147    0.251    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F p-value    
## s(HoursAfterSunrise)    4.097   5.004 11.269 < 2e-16 ***
## s(month)                5.657   8.000  9.546 < 2e-16 ***
## s(AgeAtSampling)        2.655   3.288  3.452 0.01381 *  
## s(Seq_depth_nospikein)  1.000   1.000  4.438 0.03537 *  
## s(Year)                 1.467   1.750  5.229 0.00649 ** 
## s(TotalAbundance)       1.000   1.000  0.128 0.72048    
## s(IndividID)           23.244 234.000  0.115 0.10057    
## s(GroupAtSampling)      7.010  41.000  0.338 0.02286 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.175   Deviance explained = 21.1%
## fREML =   1828  Scale est. = 1.3538    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-7.876963e-06,3.826142e-06]
## (score 1828.015 & scale 1.353826).
## Hessian positive definite, eigenvalue range [1.562667e-06,565.7809].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   4.10    1.03   0.830    
## s(month)                 8.00   5.66    0.93   0.005 ** 
## s(AgeAtSampling)         9.00   2.65    0.96   0.075 .  
## s(Seq_depth_nospikein)   9.00   1.00    0.97   0.240    
## s(Year)                  4.00   1.47    0.88  <2e-16 ***
## s(TotalAbundance)        9.00   1.00    1.02   0.715    
## s(IndividID)           235.00  23.24      NA      NA    
## s(GroupAtSampling)      42.00   7.01      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 4
## [1] "Bacteroides"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.21226    0.07292  30.338   <2e-16 ***
## StorageFROZEN -0.02333    0.11166  -0.209   0.8345    
## Seq_runRUN2    0.19027    0.09276   2.051   0.0405 *  
## Seq_runRUN3    0.23291    0.09474   2.458   0.0141 *  
## Seq_runRUN4    0.28937    0.16815   1.721   0.0855 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df       F p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000   8.821 0.00304 ** 
## s(month)               5.725e-01   8.000   0.090 0.27571    
## s(AgeAtSampling)       1.658e+00   2.070   1.307 0.28459    
## s(Seq_depth_nospikein) 1.000e+00   1.000   2.011 0.15648    
## s(Year)                2.672e+00   3.210  15.806 < 2e-16 ***
## s(TotalAbundance)      4.422e+00   5.363 123.872 < 2e-16 ***
## s(IndividID)           3.463e+00 234.000   0.015 0.38893    
## s(GroupAtSampling)     9.076e-06  41.000   0.000 0.88534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.583   Deviance explained = 58.9%
## fREML = 1648.9  Scale est. = 1.028     n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-1.077949e-05,1.945813e-05]
## (score 1648.874 & scale 1.027997).
## Hessian positive definite, eigenvalue range [1.890004e-06,565.512].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value  
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    0.97    0.20  
## s(month)               8.00e+00 5.73e-01    0.97    0.12  
## s(AgeAtSampling)       9.00e+00 1.66e+00    1.00    0.48  
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    0.96    0.08 .
## s(Year)                4.00e+00 2.67e+00    0.95    0.06 .
## s(TotalAbundance)      9.00e+00 4.42e+00    0.96    0.14  
## s(IndividID)           2.35e+02 3.46e+00      NA      NA  
## s(GroupAtSampling)     4.20e+01 9.08e-06      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 5
## [1] "Blautia"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.38695    0.05409  62.622  < 2e-16 ***
## StorageFROZEN -0.33270    0.07220  -4.608 4.55e-06 ***
## Seq_runRUN2    0.05496    0.06701   0.820   0.4123    
## Seq_runRUN3    0.12524    0.06949   1.802   0.0718 .  
## Seq_runRUN4    0.05389    0.14904   0.362   0.7177    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df       F p-value    
## s(HoursAfterSunrise)    1.000   1.000   0.005 0.94171    
## s(month)                2.789   8.000   1.931 0.00068 ***
## s(AgeAtSampling)        2.061   2.574   1.072 0.46836    
## s(Seq_depth_nospikein)  3.200   4.010   2.410 0.04721 *  
## s(Year)                 1.000   1.000   0.044 0.83322    
## s(TotalAbundance)       1.103   1.196 568.128 < 2e-16 ***
## s(IndividID)           17.424 234.000   0.084 0.14665    
## s(GroupAtSampling)      8.223  41.000   0.431 0.00848 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.579   Deviance explained = 59.4%
## fREML = 1193.4  Scale est. = 0.44926   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-6.576772e-06,1.473204e-05]
## (score 1193.422 & scale 0.4492574).
## Hessian positive definite, eigenvalue range [1.620849e-06,565.6705].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value   
## s(HoursAfterSunrise)     9.00   1.00    0.99   0.350   
## s(month)                 8.00   2.79    0.94   0.045 * 
## s(AgeAtSampling)         9.00   2.06    1.00   0.480   
## s(Seq_depth_nospikein)   9.00   3.20    1.00   0.470   
## s(Year)                  4.00   1.00    0.96   0.060 . 
## s(TotalAbundance)        9.00   1.10    0.93   0.005 **
## s(IndividID)           235.00  17.42      NA      NA   
## s(GroupAtSampling)      42.00   8.22      NA      NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 6
## [1] "Catenisphaera"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.06294    0.06581  31.348  < 2e-16 ***
## StorageFROZEN -0.14159    0.08832  -1.603  0.10920    
## Seq_runRUN2    0.07932    0.08552   0.928  0.35387    
## Seq_runRUN3    0.26400    0.08928   2.957  0.00318 ** 
## Seq_runRUN4    0.35565    0.14159   2.512  0.01216 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000  0.040 0.840699    
## s(month)               1.304e+00   8.000  0.306 0.146684    
## s(AgeAtSampling)       5.431e+00   6.365 12.103  < 2e-16 ***
## s(Seq_depth_nospikein) 1.000e+00   1.000  0.084 0.771489    
## s(Year)                1.527e+00   1.829  0.800 0.343250    
## s(TotalAbundance)      3.946e+00   4.820 40.263  < 2e-16 ***
## s(IndividID)           4.490e+01 234.000  0.269 0.000905 ***
## s(GroupAtSampling)     3.758e-05  41.000  0.000 0.608065    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.309   Deviance explained = 34.7%
## fREML = 1413.4  Scale est. = 0.6442    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-3.093541e-06,1.251552e-05]
## (score 1413.437 & scale 0.6442027).
## Hessian positive definite, eigenvalue range [1.990588e-06,566.4137].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value  
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    1.00   0.415  
## s(month)               8.00e+00 1.30e+00    0.96   0.095 .
## s(AgeAtSampling)       9.00e+00 5.43e+00    1.01   0.610  
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    0.99   0.435  
## s(Year)                4.00e+00 1.53e+00    0.96   0.045 *
## s(TotalAbundance)      9.00e+00 3.95e+00    0.96   0.075 .
## s(IndividID)           2.35e+02 4.49e+01      NA      NA  
## s(GroupAtSampling)     4.20e+01 3.76e-05      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 7
## [1] "Clostridium sensu stricto 1"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.83482    0.05499  69.733  < 2e-16 ***
## StorageFROZEN  0.03648    0.08352   0.437 0.662328    
## Seq_runRUN2   -0.27453    0.06910  -3.973 7.56e-05 ***
## Seq_runRUN3   -0.24930    0.07080  -3.521 0.000447 ***
## Seq_runRUN4   -0.40271    0.18070  -2.229 0.026043 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                          edf  Ref.df       F p-value    
## s(HoursAfterSunrise)   4.942   5.967  20.996  <2e-16 ***
## s(month)               1.991   8.000   0.902  0.0121 *  
## s(AgeAtSampling)       1.000   1.000   0.012  0.9117    
## s(Seq_depth_nospikein) 3.484   4.336   2.864  0.0238 *  
## s(Year)                2.232   2.722   2.222  0.1183    
## s(TotalAbundance)      5.816   6.763 133.951  <2e-16 ***
## s(IndividID)           5.572 234.000   0.025  0.3659    
## s(GroupAtSampling)     1.584  41.000   0.045  0.2788    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.712   Deviance explained =   72%
## fREML = 1347.3  Scale est. = 0.58937   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 10 iterations.
## Gradient range [-3.404026e-06,6.68899e-06]
## (score 1347.279 & scale 0.5893748).
## Hessian positive definite, eigenvalue range [3.403997e-06,565.5371].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value  
## s(HoursAfterSunrise)     9.00   4.94    1.01   0.625  
## s(month)                 8.00   1.99    0.94   0.020 *
## s(AgeAtSampling)         9.00   1.00    1.03   0.840  
## s(Seq_depth_nospikein)   9.00   3.48    1.01   0.655  
## s(Year)                  4.00   2.23    0.95   0.035 *
## s(TotalAbundance)        9.00   5.82    0.99   0.335  
## s(IndividID)           235.00   5.57      NA      NA  
## s(GroupAtSampling)      42.00   1.58      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 8
## [1] "Collinsella"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.28429    0.07845  29.117  < 2e-16 ***
## StorageFROZEN -0.34675    0.09940  -3.488 0.000505 ***
## Seq_runRUN2    0.48959    0.09173   5.337 1.15e-07 ***
## Seq_runRUN3    0.73036    0.09546   7.651 4.33e-14 ***
## Seq_runRUN4    0.90716    0.18257   4.969 7.80e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)    3.0061   3.689  1.327  0.22705    
## s(month)                3.1633   8.000  2.602 5.55e-05 ***
## s(AgeAtSampling)        2.7874   3.463  1.662  0.16798    
## s(Seq_depth_nospikein)  1.6920   2.161  0.086  0.91239    
## s(Year)                 1.0000   1.000  9.332  0.00231 ** 
## s(TotalAbundance)       5.0041   5.966 35.716  < 2e-16 ***
## s(IndividID)            0.3996 234.000  0.002  0.46454    
## s(GroupAtSampling)     13.0765  41.000  0.945 7.12e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.309   Deviance explained =   33%
## fREML = 1560.1  Scale est. = 0.85559   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-3.06861e-06,4.964849e-07]
## (score 1560.124 & scale 0.8555855).
## Hessian positive definite, eigenvalue range [3.068596e-06,565.5911].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   3.01    1.03    0.83    
## s(month)                 8.00   3.16    0.89  <2e-16 ***
## s(AgeAtSampling)         9.00   2.79    0.97    0.13    
## s(Seq_depth_nospikein)   9.00   1.69    1.00    0.42    
## s(Year)                  4.00   1.00    0.90  <2e-16 ***
## s(TotalAbundance)        9.00   5.00    0.95    0.05 *  
## s(IndividID)           235.00   0.40      NA      NA    
## s(GroupAtSampling)      42.00  13.08      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 9
## [1] "Enterococcus"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.278504   0.059169  55.409  < 2e-16 ***
## StorageFROZEN -0.285034   0.079082  -3.604 0.000327 ***
## Seq_runRUN2   -0.024265   0.075947  -0.319 0.749415    
## Seq_runRUN3    0.019965   0.078801   0.253 0.800036    
## Seq_runRUN4   -0.006933   0.130806  -0.053 0.957739    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df       F  p-value    
## s(HoursAfterSunrise)    3.023   3.694  11.164  < 2e-16 ***
## s(month)                2.814   8.000   1.937 0.000509 ***
## s(AgeAtSampling)        3.109   3.818   8.445 2.38e-06 ***
## s(Seq_depth_nospikein)  1.000   1.000   0.277 0.598937    
## s(Year)                 1.000   1.000  11.925 0.000575 ***
## s(TotalAbundance)       3.255   4.037 126.241  < 2e-16 ***
## s(IndividID)           33.616 234.000   0.180 0.025853 *  
## s(GroupAtSampling)      4.922  41.000   0.178 0.170204    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.474   Deviance explained =   50%
## fREML = 1294.1  Scale est. = 0.5268    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-4.166439e-06,3.280395e-06]
## (score 1294.116 & scale 0.5268041).
## Hessian positive definite, eigenvalue range [1.469094e-06,566.0221].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value  
## s(HoursAfterSunrise)     9.00   3.02    1.02   0.755  
## s(month)                 8.00   2.81    0.99   0.410  
## s(AgeAtSampling)         9.00   3.11    1.01   0.630  
## s(Seq_depth_nospikein)   9.00   1.00    0.98   0.205  
## s(Year)                  4.00   1.00    0.96   0.085 .
## s(TotalAbundance)        9.00   3.25    1.00   0.465  
## s(IndividID)           235.00  33.62      NA      NA  
## s(GroupAtSampling)      42.00   4.92      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 10
## [1] "Escherichia-Shigella"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.15855    0.11140  10.400   <2e-16 ***
## StorageFROZEN -0.13684    0.13304  -1.029    0.304    
## Seq_runRUN2    0.04999    0.12218   0.409    0.683    
## Seq_runRUN3    0.08094    0.12406   0.652    0.514    
## Seq_runRUN4    0.18356    0.21418   0.857    0.392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F p-value    
## s(HoursAfterSunrise)    2.859   3.492  3.921 0.00803 ** 
## s(month)                1.372   8.000  0.369 0.10559    
## s(AgeAtSampling)        1.000   1.000  9.195 0.00248 ** 
## s(Seq_depth_nospikein)  1.000   1.000  4.445 0.03523 *  
## s(Year)                 1.257   1.442  2.870 0.13666    
## s(TotalAbundance)       3.938   4.824 32.140 < 2e-16 ***
## s(IndividID)           13.125 234.000  0.063 0.19657    
## s(GroupAtSampling)     16.766  41.000  1.902 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.291   Deviance explained = 31.9%
## fREML = 1864.5  Scale est. = 1.4531    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-4.267278e-06,3.164214e-06]
## (score 1864.493 & scale 1.453113).
## Hessian positive definite, eigenvalue range [2.358056e-06,565.7079].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   2.86    0.99    0.26    
## s(month)                 8.00   1.37    0.91  <2e-16 ***
## s(AgeAtSampling)         9.00   1.00    0.98    0.25    
## s(Seq_depth_nospikein)   9.00   1.00    0.97    0.16    
## s(Year)                  4.00   1.26    0.92  <2e-16 ***
## s(TotalAbundance)        9.00   3.94    1.02    0.67    
## s(IndividID)           235.00  13.12      NA      NA    
## s(GroupAtSampling)      42.00  16.77      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 11
## [1] "Fusobacterium"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.913430   0.078812  24.278   <2e-16 ***
## StorageFROZEN  0.270403   0.109012   2.480   0.0133 *  
## Seq_runRUN2    0.101523   0.099840   1.017   0.3094    
## Seq_runRUN3    0.181202   0.102646   1.765   0.0778 .  
## Seq_runRUN4   -0.008187   0.174012  -0.047   0.9625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df      F p-value    
## s(HoursAfterSunrise)   1.000e+00   1.00  0.667  0.4143    
## s(month)               3.980e-06   8.00  0.000  0.4522    
## s(AgeAtSampling)       1.466e+00   1.79  2.026  0.2189    
## s(Seq_depth_nospikein) 1.000e+00   1.00  3.565  0.0593 .  
## s(Year)                1.000e+00   1.00 44.880  <2e-16 ***
## s(TotalAbundance)      4.553e+00   5.49 85.231  <2e-16 ***
## s(IndividID)           1.819e+01 234.00  0.090  0.1073    
## s(GroupAtSampling)     7.247e+00  41.00  0.339  0.0249 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.489   Deviance explained = 50.6%
## fREML = 1668.4  Scale est. = 1.0436    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-4.28469e-06,5.70973e-06]
## (score 1668.442 & scale 1.04357).
## Hessian positive definite, eigenvalue range [3.798488e-07,565.6756].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value    
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    0.98    0.22    
## s(month)               8.00e+00 3.98e-06    0.90  <2e-16 ***
## s(AgeAtSampling)       9.00e+00 1.47e+00    0.98    0.20    
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    0.98    0.24    
## s(Year)                4.00e+00 1.00e+00    0.89  <2e-16 ***
## s(TotalAbundance)      9.00e+00 4.55e+00    0.97    0.16    
## s(IndividID)           2.35e+02 1.82e+01      NA      NA    
## s(GroupAtSampling)     4.20e+01 7.25e+00      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 12
## [1] "Geodermatophilus"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.180445   0.087791  24.837  < 2e-16 ***
## StorageFROZEN -0.295817   0.094428  -3.133  0.00178 ** 
## Seq_runRUN2    0.018892   0.088097   0.214  0.83024    
## Seq_runRUN3    0.001402   0.090936   0.015  0.98771    
## Seq_runRUN4    0.152551   0.198734   0.768  0.44288    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df     F  p-value    
## s(HoursAfterSunrise)    3.508   4.278 4.803 0.000719 ***
## s(month)                1.729   8.000 0.627 0.047863 *  
## s(AgeAtSampling)        1.755   2.189 1.364 0.253926    
## s(Seq_depth_nospikein)  3.526   4.373 1.668 0.143022    
## s(Year)                 1.726   2.104 0.388 0.624677    
## s(TotalAbundance)       3.711   4.573 5.993 4.16e-05 ***
## s(IndividID)            8.983 234.000 0.041 0.278556    
## s(GroupAtSampling)     20.209  41.000 2.426  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.142   Deviance explained = 17.9%
## fREML = 1466.2  Scale est. = 0.70983   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [-6.279772e-07,3.761228e-06]
## (score 1466.224 & scale 0.7098257).
## Hessian positive definite, eigenvalue range [0.09038299,565.7287].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   3.51    0.98   0.205    
## s(month)                 8.00   1.73    0.93   0.005 ** 
## s(AgeAtSampling)         9.00   1.75    0.99   0.290    
## s(Seq_depth_nospikein)   9.00   3.53    1.03   0.845    
## s(Year)                  4.00   1.73    0.91  <2e-16 ***
## s(TotalAbundance)        9.00   3.71    1.02   0.720    
## s(IndividID)           235.00   8.98      NA      NA    
## s(GroupAtSampling)      42.00  20.21      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 13
## [1] "Helicobacter"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.27899    0.06774  18.881   <2e-16 ***
## StorageFROZEN  0.09823    0.09459   1.038    0.299    
## Seq_runRUN2    0.02925    0.08574   0.341    0.733    
## Seq_runRUN3   -0.03289    0.08707  -0.378    0.706    
## Seq_runRUN4    0.10232    0.15224   0.672    0.502    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)    3.3386   4.054  6.999 1.46e-05 ***
## s(month)                0.8524   8.000  0.166  0.19873    
## s(AgeAtSampling)        1.0000   1.000 10.103  0.00152 ** 
## s(Seq_depth_nospikein)  1.0000   1.000  2.039  0.15364    
## s(Year)                 1.9995   2.417 13.132 6.28e-07 ***
## s(TotalAbundance)       4.1493   5.046 77.050  < 2e-16 ***
## s(IndividID)           37.3367 234.000  0.214  0.00509 ** 
## s(GroupAtSampling)      4.0853  41.000  0.152  0.14830    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.509   Deviance explained = 53.4%
## fREML = 1467.6  Scale est. = 0.71607   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-6.201009e-06,6.024533e-06]
## (score 1467.63 & scale 0.7160725).
## Hessian positive definite, eigenvalue range [3.802967e-06,566.1358].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                             k'     edf k-index p-value
## s(HoursAfterSunrise)     9.000   3.339    0.99    0.40
## s(month)                 8.000   0.852    0.99    0.41
## s(AgeAtSampling)         9.000   1.000    1.00    0.46
## s(Seq_depth_nospikein)   9.000   1.000    0.98    0.21
## s(Year)                  4.000   2.000    1.00    0.52
## s(TotalAbundance)        9.000   4.149    1.02    0.74
## s(IndividID)           235.000  37.337      NA      NA
## s(GroupAtSampling)      42.000   4.085      NA      NA
## [1] 14
## [1] "Lachnoclostridium"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.93416    0.05215  56.263   <2e-16 ***
## StorageFROZEN -0.04383    0.06739  -0.650    0.516    
## Seq_runRUN2    0.08676    0.06295   1.378    0.168    
## Seq_runRUN3    0.10591    0.06575   1.611    0.108    
## Seq_runRUN4   -0.04891    0.13412  -0.365    0.715    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df       F  p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000   1.472 0.225288    
## s(month)               7.714e-05   8.000   0.000 0.389100    
## s(AgeAtSampling)       3.088e+00   3.807  23.713  < 2e-16 ***
## s(Seq_depth_nospikein) 2.736e+00   3.484   1.650 0.215014    
## s(Year)                1.516e+00   1.825   2.829 0.126870    
## s(TotalAbundance)      1.885e+00   2.374 227.034  < 2e-16 ***
## s(IndividID)           1.024e+01 234.000   0.047 0.260105    
## s(GroupAtSampling)     1.033e+01  41.000   0.620 0.000855 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.528   Deviance explained = 54.3%
## fREML = 1109.9  Scale est. = 0.38998   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-2.120786e-06,3.035296e-06]
## (score 1109.937 & scale 0.3899831).
## Hessian positive definite, eigenvalue range [1.576529e-06,565.5975].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value  
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    0.97   0.180  
## s(month)               8.00e+00 7.71e-05    1.03   0.770  
## s(AgeAtSampling)       9.00e+00 3.09e+00    1.03   0.850  
## s(Seq_depth_nospikein) 9.00e+00 2.74e+00    0.97   0.105  
## s(Year)                4.00e+00 1.52e+00    1.05   0.965  
## s(TotalAbundance)      9.00e+00 1.88e+00    0.94   0.035 *
## s(IndividID)           2.35e+02 1.02e+01      NA      NA  
## s(GroupAtSampling)     4.20e+01 1.03e+01      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 15
## [1] "Lachnospiraceae NK4A136 group"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.20101    0.07478  16.061  < 2e-16 ***
## StorageFROZEN -0.06190    0.10840  -0.571  0.56809    
## Seq_runRUN2    0.26240    0.09849   2.664  0.00783 ** 
## Seq_runRUN3    0.30582    0.10300   2.969  0.00305 ** 
## Seq_runRUN4    0.04675    0.17507   0.267  0.78948    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf  Ref.df      F p-value    
## s(HoursAfterSunrise)    1.0000   1.000  0.767 0.38130    
## s(month)                1.0440   8.000  0.219 0.17638    
## s(AgeAtSampling)        2.9452   3.642  3.330 0.01309 *  
## s(Seq_depth_nospikein)  1.0000   1.000  7.796 0.00533 ** 
## s(Year)                 1.0000   1.000  9.643 0.00195 ** 
## s(TotalAbundance)       4.1793   5.092 55.161 < 2e-16 ***
## s(IndividID)           16.5895 234.000  0.080 0.14583    
## s(GroupAtSampling)      0.5576  41.000  0.014 0.38056    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.382   Deviance explained =   40%
## fREML = 1676.5  Scale est. = 1.0661    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-5.099451e-06,1.992226e-06]
## (score 1676.5 & scale 1.066101).
## Hessian positive definite, eigenvalue range [1.620128e-06,565.6286].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                             k'     edf k-index p-value    
## s(HoursAfterSunrise)     9.000   1.000    0.96   0.085 .  
## s(month)                 8.000   1.044    0.91   0.005 ** 
## s(AgeAtSampling)         9.000   2.945    1.03   0.820    
## s(Seq_depth_nospikein)   9.000   1.000    0.96   0.110    
## s(Year)                  4.000   1.000    0.91  <2e-16 ***
## s(TotalAbundance)        9.000   4.179    1.03   0.800    
## s(IndividID)           235.000  16.590      NA      NA    
## s(GroupAtSampling)      42.000   0.558      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 16
## [1] "Lactococcus"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.89378    0.11357  16.675  < 2e-16 ***
## StorageFROZEN -0.42105    0.15040  -2.799  0.00521 ** 
## Seq_runRUN2    0.03619    0.12877   0.281  0.77870    
## Seq_runRUN3   -0.06716    0.12896  -0.521  0.60260    
## Seq_runRUN4   -0.06003    0.23638  -0.254  0.79958    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000  1.994 0.158157    
## s(month)               4.166e+00   8.000  2.890 0.000109 ***
## s(AgeAtSampling)       1.432e+00   1.746 16.129  2.4e-05 ***
## s(Seq_depth_nospikein) 1.370e+00   1.667  0.147 0.830767    
## s(Year)                3.373e+00   3.776 11.309  < 2e-16 ***
## s(TotalAbundance)      3.119e+00   3.894 13.486  < 2e-16 ***
## s(IndividID)           1.249e-04 234.000  0.000 0.628088    
## s(GroupAtSampling)     1.305e+01  41.000  0.788 0.000170 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.177   Deviance explained =   20%
## fREML = 1933.8  Scale est. = 1.6628    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 21 iterations.
## Gradient range [-2.868307e-06,2.005942e-05]
## (score 1933.837 & scale 1.662795).
## Hessian positive definite, eigenvalue range [2.476436e-06,565.588].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value   
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    1.03   0.845   
## s(month)               8.00e+00 4.17e+00    0.94   0.010 **
## s(AgeAtSampling)       9.00e+00 1.43e+00    0.97   0.125   
## s(Seq_depth_nospikein) 9.00e+00 1.37e+00    1.04   0.915   
## s(Year)                4.00e+00 3.37e+00    0.94   0.015 * 
## s(TotalAbundance)      9.00e+00 3.12e+00    1.05   0.965   
## s(IndividID)           2.35e+02 1.25e-04      NA      NA   
## s(GroupAtSampling)     4.20e+01 1.30e+01      NA      NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 17
## [1] "Marvinbryantia"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.13559    0.08924  23.932   <2e-16 ***
## StorageFROZEN -0.25773    0.10691  -2.411   0.0161 *  
## Seq_runRUN2    0.05836    0.10226   0.571   0.5684    
## Seq_runRUN3    0.22162    0.10575   2.096   0.0363 *  
## Seq_runRUN4   -0.05776    0.17452  -0.331   0.7407    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000 16.195 6.18e-05 ***
## s(month)               2.224e-05   8.000  0.000   0.8738    
## s(AgeAtSampling)       3.551e+00   4.332 18.484  < 2e-16 ***
## s(Seq_depth_nospikein) 1.293e+00   1.534  2.438   0.1811    
## s(Year)                2.300e+00   2.766  1.769   0.2280    
## s(TotalAbundance)      3.598e+00   4.435 61.258  < 2e-16 ***
## s(IndividID)           2.142e+01 234.000  0.111   0.0611 .  
## s(GroupAtSampling)     1.418e+01  41.000  1.126 1.59e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.475   Deviance explained = 49.8%
## fREML = 1588.1  Scale est. = 0.8876    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [-8.95451e-06,2.1666e-06]
## (score 1588.072 & scale 0.8875976).
## Hessian positive definite, eigenvalue range [1.87015e-06,565.7994].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    1.01    0.55
## s(month)               8.00e+00 2.22e-05    0.98    0.23
## s(AgeAtSampling)       9.00e+00 3.55e+00    1.05    0.94
## s(Seq_depth_nospikein) 9.00e+00 1.29e+00    0.99    0.40
## s(Year)                4.00e+00 2.30e+00    0.98    0.26
## s(TotalAbundance)      9.00e+00 3.60e+00    0.97    0.14
## s(IndividID)           2.35e+02 2.14e+01      NA      NA
## s(GroupAtSampling)     4.20e+01 1.42e+01      NA      NA
## [1] 18
## [1] "Paeniclostridium"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.49694    0.10139  24.627   <2e-16 ***
## StorageFROZEN  0.04597    0.14341   0.321   0.7486    
## Seq_runRUN2   -0.25263    0.12579  -2.008   0.0448 *  
## Seq_runRUN3   -0.19471    0.12887  -1.511   0.1311    
## Seq_runRUN4   -0.09860    0.23997  -0.411   0.6812    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   4.1222   5.033 17.306  < 2e-16 ***
## s(month)               0.1845   8.000  0.025   0.3283    
## s(AgeAtSampling)       1.1339   1.253 17.333 3.35e-05 ***
## s(Seq_depth_nospikein) 1.0000   1.000  0.384   0.5356    
## s(Year)                1.5779   1.916  0.480   0.6064    
## s(TotalAbundance)      3.2783   4.077 27.978  < 2e-16 ***
## s(IndividID)           6.7377 234.000  0.030   0.3295    
## s(GroupAtSampling)     8.1144  41.000  0.388   0.0114 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.33   Deviance explained = 34.7%
## fREML = 1967.6  Scale est. = 1.7825    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 11 iterations.
## Gradient range [-1.156708e-05,3.012972e-06]
## (score 1967.551 & scale 1.782474).
## Hessian positive definite, eigenvalue range [1.156693e-05,565.5561].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                             k'     edf k-index p-value  
## s(HoursAfterSunrise)     9.000   4.122    1.04   0.895  
## s(month)                 8.000   0.184    0.97   0.105  
## s(AgeAtSampling)         9.000   1.134    1.04   0.920  
## s(Seq_depth_nospikein)   9.000   1.000    1.00   0.450  
## s(Year)                  4.000   1.578    0.94   0.025 *
## s(TotalAbundance)        9.000   3.278    1.03   0.870  
## s(IndividID)           235.000   6.738      NA      NA  
## s(GroupAtSampling)      42.000   8.114      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 19
## [1] "Parabacteroides"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.07191    0.06717  15.958   <2e-16 ***
## StorageFROZEN  0.04670    0.09775   0.478    0.633    
## Seq_runRUN2    0.09750    0.08509   1.146    0.252    
## Seq_runRUN3    0.10732    0.08664   1.239    0.216    
## Seq_runRUN4   -0.02433    0.17493  -0.139    0.889    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)    3.533   4.309  5.505 0.000172 ***
## s(month)                1.970   8.000  0.783 0.023550 *  
## s(AgeAtSampling)        1.000   1.000  0.017 0.897581    
## s(Seq_depth_nospikein)  1.643   2.082  4.828 0.007821 ** 
## s(Year)                 2.081   2.525 15.737  < 2e-16 ***
## s(TotalAbundance)       4.525   5.457 71.820  < 2e-16 ***
## s(IndividID)           23.145 234.000  0.116 0.077786 .  
## s(GroupAtSampling)      3.190  41.000  0.111 0.153944    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.507   Deviance explained = 52.7%
## fREML = 1512.1  Scale est. = 0.78401   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-7.820569e-06,1.601191e-06]
## (score 1512.073 & scale 0.7840062).
## Hessian positive definite, eigenvalue range [7.820453e-06,565.7527].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   3.53    1.02   0.705    
## s(month)                 8.00   1.97    0.92  <2e-16 ***
## s(AgeAtSampling)         9.00   1.00    1.00   0.520    
## s(Seq_depth_nospikein)   9.00   1.64    1.02   0.700    
## s(Year)                  4.00   2.08    0.93   0.010 ** 
## s(TotalAbundance)        9.00   4.53    0.96   0.085 .  
## s(IndividID)           235.00  23.14      NA      NA    
## s(GroupAtSampling)      42.00   3.19      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 20
## [1] "Pediococcus"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.93587    0.09576   9.773  < 2e-16 ***
## StorageFROZEN -0.40785    0.11925  -3.420 0.000649 ***
## Seq_runRUN2   -0.01448    0.10858  -0.133 0.893914    
## Seq_runRUN3   -0.08459    0.11032  -0.767 0.443416    
## Seq_runRUN4   -0.05155    0.20586  -0.250 0.802314    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)    5.281   6.313  1.792 0.093559 .  
## s(month)                2.461   8.000  1.612 0.001556 ** 
## s(AgeAtSampling)        1.000   1.000  1.254 0.263104    
## s(Seq_depth_nospikein)  1.000   1.000  0.000 0.984903    
## s(Year)                 1.000   1.000 12.398 0.000447 ***
## s(TotalAbundance)       1.000   1.000  4.576 0.032633 *  
## s(IndividID)           11.219 234.000  0.053 0.218066    
## s(GroupAtSampling)     14.991  41.000  0.959 0.000157 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0938   Deviance explained = 12.7%
## fREML = 1750.3  Scale est. = 1.19      n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-7.020629e-06,1.941744e-05]
## (score 1750.298 & scale 1.189963).
## Hessian positive definite, eigenvalue range [1.940113e-06,565.6663].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value  
## s(HoursAfterSunrise)     9.00   5.28    0.96   0.085 .
## s(month)                 8.00   2.46    0.97   0.150  
## s(AgeAtSampling)         9.00   1.00    0.98   0.275  
## s(Seq_depth_nospikein)   9.00   1.00    1.04   0.875  
## s(Year)                  4.00   1.00    0.96   0.100 .
## s(TotalAbundance)        9.00   1.00    1.01   0.540  
## s(IndividID)           235.00  11.22      NA      NA  
## s(GroupAtSampling)      42.00  14.99      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 21
## [1] "Peptoclostridium"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.79789    0.12575   6.345  3.3e-10 ***
## StorageFROZEN -0.24745    0.13333  -1.856   0.0637 .  
## Seq_runRUN2    0.02926    0.13279   0.220   0.8257    
## Seq_runRUN3    0.10867    0.13293   0.818   0.4138    
## Seq_runRUN4    0.10022    0.20244   0.495   0.6207    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000  0.951 0.329662    
## s(month)               9.156e-06   8.000  0.000 0.641092    
## s(AgeAtSampling)       1.000e+00   1.000  1.351 0.245321    
## s(Seq_depth_nospikein) 1.000e+00   1.000  0.864 0.352836    
## s(Year)                2.903e+00   3.361 10.054 6.39e-07 ***
## s(TotalAbundance)      2.479e+00   3.100 12.843  < 2e-16 ***
## s(IndividID)           5.494e+01 234.000  0.381 0.000306 ***
## s(GroupAtSampling)     1.902e+01  41.000  3.871  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.252   Deviance explained = 30.9%
## fREML = 1789.8  Scale est. = 1.2121    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-6.619446e-06,7.861149e-06]
## (score 1789.839 & scale 1.212074).
## Hessian positive definite, eigenvalue range [1.763774e-06,567.0145].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value    
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    1.03    0.77    
## s(month)               8.00e+00 9.16e-06    0.93  <2e-16 ***
## s(AgeAtSampling)       9.00e+00 1.00e+00    1.01    0.68    
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    1.05    0.96    
## s(Year)                4.00e+00 2.90e+00    0.93  <2e-16 ***
## s(TotalAbundance)      9.00e+00 2.48e+00    1.02    0.67    
## s(IndividID)           2.35e+02 5.49e+01      NA      NA    
## s(GroupAtSampling)     4.20e+01 1.90e+01      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 22
## [1] "Peptococcus"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.25520    0.07706  29.266   <2e-16 ***
## StorageFROZEN -0.08868    0.10000  -0.887    0.375    
## Seq_runRUN2   -0.04861    0.09686  -0.502    0.616    
## Seq_runRUN3    0.13087    0.10042   1.303    0.193    
## Seq_runRUN4   -0.12682    0.15630  -0.811    0.417    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df       F p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000   1.122  0.2898    
## s(month)               4.582e-06   8.000   0.000  0.7422    
## s(AgeAtSampling)       4.146e+00   4.990  28.706  <2e-16 ***
## s(Seq_depth_nospikein) 1.000e+00   1.000   5.087  0.0243 *  
## s(Year)                2.082e+00   2.510   4.013  0.0114 *  
## s(TotalAbundance)      1.746e+00   2.184 116.941  <2e-16 ***
## s(IndividID)           3.876e+01 234.000   0.215  0.0131 *  
## s(GroupAtSampling)     5.695e+00  41.000   0.295  0.0255 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.435   Deviance explained = 46.4%
## fREML = 1525.3  Scale est. = 0.79314   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-5.553717e-06,6.157545e-07]
## (score 1525.284 & scale 0.7931389).
## Hessian positive definite, eigenvalue range [1.494603e-06,566.1876].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    1.02    0.76
## s(month)               8.00e+00 4.58e-06    1.01    0.64
## s(AgeAtSampling)       9.00e+00 4.15e+00    1.03    0.84
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    0.98    0.27
## s(Year)                4.00e+00 2.08e+00    1.01    0.60
## s(TotalAbundance)      9.00e+00 1.75e+00    0.99    0.41
## s(IndividID)           2.35e+02 3.88e+01      NA      NA
## s(GroupAtSampling)     4.20e+01 5.70e+00      NA      NA
## [1] 23
## [1] "Phascolarctobacterium"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.893665   0.068679  27.573   <2e-16 ***
## StorageFROZEN  0.064789   0.095786   0.676    0.499    
## Seq_runRUN2    0.025142   0.089070   0.282    0.778    
## Seq_runRUN3   -0.004265   0.093288  -0.046    0.964    
## Seq_runRUN4   -0.026858   0.157739  -0.170    0.865    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df       F p-value    
## s(HoursAfterSunrise)   1.000e+00   1.000  10.139 0.00149 ** 
## s(month)               1.866e-05   8.000   0.000 0.52490    
## s(AgeAtSampling)       4.458e+00   5.353   6.233 9.3e-06 ***
## s(Seq_depth_nospikein) 1.099e+00   1.190   1.042 0.36634    
## s(Year)                1.000e+00   1.000  32.143 < 2e-16 ***
## s(TotalAbundance)      4.470e+00   5.404 110.006 < 2e-16 ***
## s(IndividID)           1.609e+01 234.000   0.078 0.13802    
## s(GroupAtSampling)     3.407e+00  41.000   0.120 0.13560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.557   Deviance explained = 57.1%
## fREML = 1535.7  Scale est. = 0.82549   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-5.83792e-06,8.627223e-06]
## (score 1535.702 & scale 0.8254942).
## Hessian positive definite, eigenvalue range [1.786395e-06,565.6303].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value  
## s(HoursAfterSunrise)   9.00e+00 1.00e+00    0.98   0.180  
## s(month)               8.00e+00 1.87e-05    0.97   0.170  
## s(AgeAtSampling)       9.00e+00 4.46e+00    0.99   0.280  
## s(Seq_depth_nospikein) 9.00e+00 1.10e+00    0.97   0.155  
## s(Year)                4.00e+00 1.00e+00    0.94   0.025 *
## s(TotalAbundance)      9.00e+00 4.47e+00    0.94   0.025 *
## s(IndividID)           2.35e+02 1.61e+01      NA      NA  
## s(GroupAtSampling)     4.20e+01 3.41e+00      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 24
## [1] "Rare taxa"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.60894    0.02589 139.409  < 2e-16 ***
## StorageFROZEN -0.04196    0.03996  -1.050    0.294    
## Seq_runRUN2    0.14696    0.03301   4.453 9.34e-06 ***
## Seq_runRUN3    0.13124    0.03353   3.914 9.64e-05 ***
## Seq_runRUN4    0.09722    0.06336   1.534    0.125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df       F  p-value    
## s(HoursAfterSunrise)   3.617e+00   4.427   5.210 0.000248 ***
## s(month)               2.299e+00   8.000   1.369 0.001935 ** 
## s(AgeAtSampling)       1.234e+00   1.429   0.945 0.462304    
## s(Seq_depth_nospikein) 1.000e+00   1.000   0.117 0.732247    
## s(Year)                2.541e+00   3.067   7.217  6.7e-05 ***
## s(TotalAbundance)      5.598e+00   6.555 123.943  < 2e-16 ***
## s(IndividID)           7.835e+00 234.000   0.035 0.303405    
## s(GroupAtSampling)     1.642e-05  41.000   0.000 0.571876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.568   Deviance explained = 57.8%
## fREML = 491.92  Scale est. = 0.13077   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-2.629817e-06,1.682781e-06]
## (score 491.9209 & scale 0.1307729).
## Hessian positive definite, eigenvalue range [1.124065e-06,565.543].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value  
## s(HoursAfterSunrise)   9.00e+00 3.62e+00    1.07    0.99  
## s(month)               8.00e+00 2.30e+00    0.95    0.05 *
## s(AgeAtSampling)       9.00e+00 1.23e+00    1.01    0.62  
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    1.02    0.75  
## s(Year)                4.00e+00 2.54e+00    0.93    0.02 *
## s(TotalAbundance)      9.00e+00 5.60e+00    0.99    0.41  
## s(IndividID)           2.35e+02 7.83e+00      NA      NA  
## s(GroupAtSampling)     4.20e+01 1.64e-05      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 25
## [1] "Rikenellaceae RC9 gut group"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.28901    0.07144  18.042   <2e-16 ***
## StorageFROZEN -0.02868    0.10234  -0.280    0.779    
## Seq_runRUN2    0.12756    0.09049   1.410    0.159    
## Seq_runRUN3    0.13899    0.09317   1.492    0.136    
## Seq_runRUN4    0.07412    0.15965   0.464    0.643    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   3.315e+00   4.047  4.897 0.000666 ***
## s(month)               2.044e+00   8.000  0.734 0.032035 *  
## s(AgeAtSampling)       3.853e+00   4.681 16.251  < 2e-16 ***
## s(Seq_depth_nospikein) 1.000e+00   1.000  5.281 0.021752 *  
## s(Year)                2.830e+00   3.344  9.646 1.53e-06 ***
## s(TotalAbundance)      4.176e+00   5.085 90.727  < 2e-16 ***
## s(IndividID)           2.135e+01 234.000  0.107 0.075758 .  
## s(GroupAtSampling)     1.554e-04  41.000  0.000 0.456840    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.543   Deviance explained =   56%
## fREML = 1529.7  Scale est. = 0.80878   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 17 iterations.
## Gradient range [-2.59411e-06,1.416098e-05]
## (score 1529.662 & scale 0.808778).
## Hessian positive definite, eigenvalue range [1.985874e-06,565.7159].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value  
## s(HoursAfterSunrise)   9.00e+00 3.32e+00    1.04   0.915  
## s(month)               8.00e+00 2.04e+00    0.97   0.095 .
## s(AgeAtSampling)       9.00e+00 3.85e+00    1.00   0.455  
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    1.01   0.635  
## s(Year)                4.00e+00 2.83e+00    0.97   0.130  
## s(TotalAbundance)      9.00e+00 4.18e+00    1.01   0.575  
## s(IndividID)           2.35e+02 2.14e+01      NA      NA  
## s(GroupAtSampling)     4.20e+01 1.55e-04      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 26
## [1] "Romboutsia"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.38755    0.10426  22.900  < 2e-16 ***
## StorageFROZEN  0.17059    0.14851   1.149  0.25094    
## Seq_runRUN2   -0.40537    0.12555  -3.229  0.00128 ** 
## Seq_runRUN3   -0.28043    0.12697  -2.209  0.02740 *  
## Seq_runRUN4   -0.05848    0.26834  -0.218  0.82753    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf  Ref.df      F p-value    
## s(HoursAfterSunrise)   2.54353   3.142  3.947 0.00746 ** 
## s(month)               1.93710   8.000  0.794 0.01854 *  
## s(AgeAtSampling)       1.66523   2.079 16.882 < 2e-16 ***
## s(Seq_depth_nospikein) 2.27958   2.942  0.535 0.60366    
## s(Year)                3.17335   3.645  4.945 0.00158 ** 
## s(TotalAbundance)      3.81355   4.695 32.660 < 2e-16 ***
## s(IndividID)           0.07013 234.000  0.000 0.46926    
## s(GroupAtSampling)     7.32084  41.000  0.311 0.02639 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.305   Deviance explained = 32.2%
## fREML = 1935.8  Scale est. = 1.6875    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 19 iterations.
## Gradient range [-1.624929e-08,7.372425e-07]
## (score 1935.836 & scale 1.687487).
## Hessian positive definite, eigenvalue range [1.383488e-05,565.533].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value    
## s(HoursAfterSunrise)     9.0000   2.5435    0.99    0.38    
## s(month)                 8.0000   1.9371    0.93  <2e-16 ***
## s(AgeAtSampling)         9.0000   1.6652    0.95    0.03 *  
## s(Seq_depth_nospikein)   9.0000   2.2796    1.00    0.49    
## s(Year)                  4.0000   3.1733    0.90  <2e-16 ***
## s(TotalAbundance)        9.0000   3.8136    1.04    0.91    
## s(IndividID)           235.0000   0.0701      NA      NA    
## s(GroupAtSampling)      42.0000   7.3208      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 27
## [1] "Ruminococcaceae UCG-014"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.70017    0.08744  19.444   <2e-16 ***
## StorageFROZEN -0.09706    0.11767  -0.825    0.410    
## Seq_runRUN2    0.05508    0.10678   0.516    0.606    
## Seq_runRUN3    0.03141    0.10985   0.286    0.775    
## Seq_runRUN4   -0.21265    0.17822  -1.193    0.233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)    1.000   1.000  0.115 0.734354    
## s(month)                2.921   8.000  1.923 0.000805 ***
## s(AgeAtSampling)        3.580   4.372  5.099 0.000378 ***
## s(Seq_depth_nospikein)  1.000   1.000  7.753 0.005453 ** 
## s(Year)                 2.668   3.175  3.925 0.007476 ** 
## s(TotalAbundance)       1.000   1.000 72.982  < 2e-16 ***
## s(IndividID)           14.559 234.000  0.069 0.190576    
## s(GroupAtSampling)      8.078  41.000  0.434 0.005649 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.179   Deviance explained = 20.7%
## fREML = 1685.9  Scale est. = 1.073     n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-5.410251e-06,3.327954e-06]
## (score 1685.9 & scale 1.073017).
## Hessian positive definite, eigenvalue range [2.140421e-06,565.6307].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value  
## s(HoursAfterSunrise)     9.00   1.00    1.00   0.525  
## s(month)                 8.00   2.92    0.94   0.020 *
## s(AgeAtSampling)         9.00   3.58    0.96   0.065 .
## s(Seq_depth_nospikein)   9.00   1.00    1.00   0.470  
## s(Year)                  4.00   2.67    0.96   0.095 .
## s(TotalAbundance)        9.00   1.00    1.01   0.695  
## s(IndividID)           235.00  14.56      NA      NA  
## s(GroupAtSampling)      42.00   8.08      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 28
## [1] "Slackia"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.29955    0.04510  50.992  < 2e-16 ***
## StorageFROZEN -0.19307    0.06425  -3.005  0.00272 ** 
## Seq_runRUN2    0.13824    0.05753   2.403  0.01644 *  
## Seq_runRUN3    0.14954    0.05895   2.537  0.01133 *  
## Seq_runRUN4    0.30790    0.12722   2.420  0.01567 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df      F p-value    
## s(HoursAfterSunrise)    2.390   2.965  2.434 0.05295 .  
## s(month)                2.595   8.000  0.896 0.03076 *  
## s(AgeAtSampling)        1.000   1.001  0.000 0.99768    
## s(Seq_depth_nospikein)  2.467   3.159  1.372 0.25825    
## s(Year)                 1.000   1.000  7.605 0.00592 ** 
## s(TotalAbundance)       4.901   5.847 91.653 < 2e-16 ***
## s(IndividID)           25.937 234.000  0.132 0.06426 .  
## s(GroupAtSampling)      4.183  41.000  0.146 0.16861    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.501   Deviance explained = 52.2%
## fREML = 1068.1  Scale est. = 0.35619   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-2.397232e-06,1.436141e-05]
## (score 1068.089 & scale 0.3561865).
## Hessian positive definite, eigenvalue range [2.088478e-06,565.8175].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value  
## s(HoursAfterSunrise)     9.00   2.39    0.99   0.390  
## s(month)                 8.00   2.59    0.97   0.105  
## s(AgeAtSampling)         9.00   1.00    1.00   0.425  
## s(Seq_depth_nospikein)   9.00   2.47    0.96   0.085 .
## s(Year)                  4.00   1.00    1.03   0.810  
## s(TotalAbundance)        9.00   4.90    0.98   0.195  
## s(IndividID)           235.00  25.94      NA      NA  
## s(GroupAtSampling)      42.00   4.18      NA      NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 29
## [1] "Turicibacter"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.2240     0.1136  10.773   <2e-16 ***
## StorageFROZEN  -0.1063     0.1392  -0.763    0.445    
## Seq_runRUN2     0.1653     0.1291   1.280    0.201    
## Seq_runRUN3    -0.1153     0.1306  -0.883    0.378    
## Seq_runRUN4    -0.2564     0.2196  -1.167    0.243    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df     F  p-value    
## s(HoursAfterSunrise)    3.668   4.475 5.624 0.000105 ***
## s(month)                3.532   8.000 3.478 6.78e-06 ***
## s(AgeAtSampling)        1.679   2.080 1.426 0.267365    
## s(Seq_depth_nospikein)  1.000   1.000 0.062 0.804245    
## s(Year)                 2.841   3.326 3.640 0.013399 *  
## s(TotalAbundance)       3.866   4.737 6.022 3.38e-05 ***
## s(IndividID)           25.221 234.000 0.133 0.047406 *  
## s(GroupAtSampling)     14.213  41.000 0.938 0.000534 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.16   Deviance explained = 20.4%
## fREML = 1857.7  Scale est. = 1.4087    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [-2.282441e-06,5.13065e-06]
## (score 1857.657 & scale 1.408664).
## Hessian positive definite, eigenvalue range [2.285786e-06,565.8858].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   3.67    0.99   0.330    
## s(month)                 8.00   3.53    0.80  <2e-16 ***
## s(AgeAtSampling)         9.00   1.68    0.94   0.005 ** 
## s(Seq_depth_nospikein)   9.00   1.00    1.04   0.915    
## s(Year)                  4.00   2.84    0.74  <2e-16 ***
## s(TotalAbundance)        9.00   3.87    1.04   0.910    
## s(IndividID)           235.00  25.22      NA      NA    
## s(GroupAtSampling)      42.00  14.21      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 30
## [1] "Tyzzerella 4"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.54132    0.08350  18.458   <2e-16 ***
## StorageFROZEN -0.06704    0.10673  -0.628    0.530    
## Seq_runRUN2   -0.02590    0.10159  -0.255    0.799    
## Seq_runRUN3    0.06984    0.10559   0.661    0.508    
## Seq_runRUN4    0.09654    0.17068   0.566    0.572    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf  Ref.df      F  p-value    
## s(HoursAfterSunrise)   1.255e+00   1.463  0.071 0.935267    
## s(month)               2.027e-05   8.000  0.000 0.842551    
## s(AgeAtSampling)       3.199e+00   3.931  5.209 0.000537 ***
## s(Seq_depth_nospikein) 1.000e+00   1.000  3.949 0.047161 *  
## s(Year)                1.292e+00   1.494 13.588 0.000249 ***
## s(TotalAbundance)      3.904e+00   4.785 57.959  < 2e-16 ***
## s(IndividID)           1.681e+01 234.000  0.082 0.144724    
## s(GroupAtSampling)     1.070e+01  41.000  0.593 0.003595 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.389   Deviance explained = 41.2%
## fREML = 1630.2  Scale est. = 0.96918   n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-7.828685e-06,3.235263e-06]
## (score 1630.165 & scale 0.9691812).
## Hessian positive definite, eigenvalue range [2.140878e-06,565.6819].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf k-index p-value    
## s(HoursAfterSunrise)   9.00e+00 1.26e+00    0.98    0.23    
## s(month)               8.00e+00 2.03e-05    0.93  <2e-16 ***
## s(AgeAtSampling)       9.00e+00 3.20e+00    1.01    0.57    
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00    1.02    0.76    
## s(Year)                4.00e+00 1.29e+00    0.91  <2e-16 ***
## s(TotalAbundance)      9.00e+00 3.90e+00    0.97    0.17    
## s(IndividID)           2.35e+02 1.68e+01      NA      NA    
## s(GroupAtSampling)     4.20e+01 1.07e+01      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 31
## [1] "Weissella"
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month, 
##     bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein, 
##     bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance, 
##     bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") + 
##     s(GroupAtSampling, bs = "re")
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.01397    0.10333   9.813   <2e-16 ***
## StorageFROZEN -0.21030    0.12512  -1.681   0.0931 .  
## Seq_runRUN2   -0.14249    0.11839  -1.204   0.2290    
## Seq_runRUN3   -0.02069    0.12080  -0.171   0.8640    
## Seq_runRUN4    0.18444    0.20279   0.909   0.3633    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf  Ref.df     F  p-value    
## s(HoursAfterSunrise)    1.000   1.000 0.570  0.45031    
## s(month)                5.232   8.000 6.727  < 2e-16 ***
## s(AgeAtSampling)        1.425   1.725 3.020  0.11953    
## s(Seq_depth_nospikein)  1.097   1.186 0.155  0.83225    
## s(Year)                 1.000   1.000 1.568  0.21083    
## s(TotalAbundance)       2.750   3.441 4.256  0.00387 ** 
## s(IndividID)           23.171 234.000 0.119  0.07317 .  
## s(GroupAtSampling)     14.958  41.000 1.114 5.85e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.127   Deviance explained = 16.9%
## fREML = 1794.6  Scale est. = 1.2686    n = 1141

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-3.196111e-06,1.453356e-05]
## (score 1794.623 & scale 1.268577).
## Hessian positive definite, eigenvalue range [2.265557e-06,565.851].
## Model rank =  330 / 330 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value    
## s(HoursAfterSunrise)     9.00   1.00    1.03    0.86    
## s(month)                 8.00   5.23    0.92    0.01 ** 
## s(AgeAtSampling)         9.00   1.42    0.94    0.02 *  
## s(Seq_depth_nospikein)   9.00   1.10    1.02    0.74    
## s(Year)                  4.00   1.00    0.89  <2e-16 ***
## s(TotalAbundance)        9.00   2.75    1.03    0.82    
## s(IndividID)           235.00  23.17      NA      NA    
## s(GroupAtSampling)      42.00  14.96      NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
names(year_estimates)<-taxanames

year_estimates_df<-data.frame(do.call(rbind, year_estimates))

This chunk took 1.95 minutes

year_estimates_df$Significant <-year_estimates_df$P_val<0.05

year_estimates_df <- year_estimates_df  %>% dplyr::arrange(-effect_size)


year_estimates_unique<-dplyr::distinct(year_estimates_df, Taxa, .keep_all = T)
dim(year_estimates_unique)
## [1] 31 15
taxa_order<-as.character(year_estimates_unique$Taxa)
year_estimates_df$Taxa<-factor(year_estimates_df$Taxa, levels = taxa_order)


########## calculate effect size manually (difference between early and late)

year_estimates_df$Year2<-substr(year_estimates_df$Year, 1, 4)
calculate_effect_size<-subset(year_estimates_df, Year2 <2001 | Year2 > 2014)
calculate_effect_size<-calculate_effect_size[,c(3,9,10, 16)]
calculate_effect_size$Period <- ifelse(calculate_effect_size$Year2 <2001, "EARLY", "LATE")
table(calculate_effect_size$Period)
## 
## EARLY  LATE 
##  1147  1147
calculate_effect_size<-calculate_effect_size %>% group_by(Taxa, Period) %>% summarize(Mean_est = mean(est_real))

calculate_effect_size_wide<- spread(calculate_effect_size, Period, Mean_est)
head(calculate_effect_size_wide)
calculate_effect_size_wide$Effect<- calculate_effect_size_wide$LATE - calculate_effect_size_wide$EARLY

calculate_effect_size_wide<-merge(calculate_effect_size_wide, year_estimates_df, by = "Taxa")

calculate_effect_size_wide<-distinct(calculate_effect_size_wide, Taxa, .keep_all = T)


calculate_effect_size_wide<-calculate_effect_size_wide %>%arrange(Effect)

taxa_order<-as.character(calculate_effect_size_wide$Taxa)

This chunk took 0 minutes

################### plot 
################### plot 
################### plot 

# first add class column so can match colours with earlier figs

taxonomy<-data.frame(tax_table(genus_ps))
tax<-taxonomy[,c("Genus", "Class")]
tax<-distinct(tax, Genus, .keep_all = T)

calculate_effect_size_wide$Class <- vlookup(calculate_effect_size_wide$Taxa, tax, lookup_column = "Genus", result_column = "Class")

calculate_effect_size_wide$Class_plot<-ifelse(calculate_effect_size_wide$Class %in% top_class, calculate_effect_size_wide$Class, "Other")

calculate_effect_size_wide$Class_plot<-factor(calculate_effect_size_wide$Class_plot, levels = c("Bacteroidia","Fusobacteriia" ,  "Other","Gammaproteobacteria", "Clostridia",  
                                                                                                "Coriobacteriia","Actinobacteria",    "Bacilli") )

## 
calculate_effect_size_wide$Sig <-ifelse(calculate_effect_size_wide$Significant==T, "yes", "no")


plot_tb_1<-ggplot(subset(calculate_effect_size_wide, Taxa != "uncultured"), aes(y = reorder(Taxa, Effect), x = Effect))+
  theme_bw(base_size = 14)+
  geom_bar_pattern(stat = "identity", aes(fill = Class_plot, pattern = Sig),
                   color = "black", 
                   pattern_fill = "black",
                   pattern_angle = 45,
                   pattern_density = 0.1,
                   pattern_spacing = 0.025,
                   pattern_key_scale_factor = 0.6)+
  
  scale_pattern_manual(values = c(no = "stripe", yes = "none"))+
  scale_fill_manual(values = rev(pal))+
  
  ylab("") +
  theme(plot.margin=unit(c(1,0.2,0.2,0.2),"cm"))+
  xlab("Effect size")+
  labs(pattern = "Significant")+
  labs(fill = "Bacterial class")

plot_tb_1

This chunk took 0.05 minutes

10.0.2 Models with year as a random effect

  • Add variables of interest to models
TB_estimates<-list()
Condition_estimates<-list()
Temp_estimates<-list()
Rainfall_estimates<-list()

taxanames<-unique(top_genera_melt3$Genus2)

for (i in 1:length(taxanames)){
  tryCatch({ #catch errors
    print(i)
    
    taxa1<-subset(top_genera_melt3, Genus2 == taxanames[i])
    # taxa1<-subset(top_genera_melt3, Genus2 == "Fusobacterium")
    
    metadata<-taxa1
    
    #scale variables
    metadata$AgeAtSampling<-as.numeric(scale(log10(metadata$AgeAtSampling)))
    metadata$TotalAbundance<-as.numeric(scale(log10(metadata$TotalAbundance)))
    metadata$Rainfall<-as.numeric(scale(log10(metadata$sum_rainfall_month+1)))
    metadata$Seq_depth_nospikein<-as.numeric(scale(log10(metadata$Seq_depth_nospikein)))
    
    
    metadata$TB_stage<-factor(metadata$TB_stage, levels = c("Unexposed", "Exposed","Diseased"))
    
    metadata$Year<-factor(metadata$Year)
    
    # fit gamm    
    m_taxa <- mgcv::gam(log10(Abundance2+1)~
                          s(HoursAfterSunrise, bs = "cr")+
                          s(month, bs = "cc")+
                          s(AgeAtSampling, bs = "cr")+
                          s(TotalAbundance, bs = "cr")+
                          s(Seq_depth_nospikein, bs = "cr")+
                          TB_stage+
                          Condition_weight+
                          Temp_max+
                          Rainfall+
                          Season+
                          Storage+
                          Seq_run+
                          s(IndividID, bs = "re")+
                          s(Year, bs = "re")+
                          s(GroupAtSampling, bs = "re"),
                        #correlation = corARMA(form = ~ 1|Year, p = 3),
                        data=metadata, 
                        family = gaussian)
    
    summary<-summary(m_taxa)
    
    ## TB estimates 
    
    TB_estimates_df<-data.frame(summary$p.table)[1:3,]
    TB_estimates_df$Level<-c("Unexposed", "Exposed", "Diseased")
    names(TB_estimates_df)[4]<-"P_val"
    
    TB_estimates_df$Estimate2<-ifelse(TB_estimates_df$Level == "Unexposed", 0, TB_estimates_df$Estimate)
    
    
    TB_estimates_df$lower<-NA
    TB_estimates_df$upper<-NA
    
    # confidence intervals exposed
    beta <- coef(m_taxa)
    Vb <- vcov(m_taxa, unconditional = TRUE)
    se <- sqrt(diag(Vb))
    j <- which(names(beta) == "(Intercept)")
    cis<- beta[j] + (c(-1,1) * (2 * se[j]))
    
    TB_estimates_df[1,7]<-cis[1]
    TB_estimates_df[1,8]<-cis[2]
    
    # confidence intervals exposed
    j <- which(names(beta) == "TB_stageExposed")
    cis<- beta[j] + (c(-1,1) * (2 * se[j]))
    
    TB_estimates_df[2,7]<-cis[1]
    TB_estimates_df[2,8]<-cis[2]
    
    # confidence intervals symptomatic
    j <- which(names(beta) == "TB_stageDiseased")
    cis<- beta[j] + (c(-1,1) * (2 * se[j]))
    
    TB_estimates_df[3,7]<-cis[1]
    TB_estimates_df[3,8]<-cis[2]
    
    ## add coefficient
    
    TB_estimates_df$Coef <-as.numeric(coef(m_taxa)[1:3])
    TB_estimates_df$Taxa <-taxanames[i]
    TB_estimates[[i]]<-TB_estimates_df
    
    ## extract condition stats
    
    condition_estimates<- data.frame(summary$p.table)[4,]
    condition_estimates$Taxa<-taxanames[i]
    Condition_estimates[[i]]<-condition_estimates
    
    ## extract max temp stats
    
    temp_estimates<- data.frame(summary$p.table)[5,]
    temp_estimates$Taxa<-taxanames[i]
    Temp_estimates[[i]]<-temp_estimates
    
    ## extract rainfall stats
    
    rainfall_estimates<- data.frame(summary$p.table)[6,]
    rainfall_estimates$Taxa<-taxanames[i]
    Rainfall_estimates[[i]]<-rainfall_estimates
    
    
  }, error=function(e){})
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31

This chunk took 22.03 minutes

10.0.3 TB estimates

names(TB_estimates)<-taxanames

TB_estimates_df<-do.call(rbind, TB_estimates)


TB_estimates_df$Significant <-TB_estimates_df$P_val < 0.05

# edit significance (intercept will always be significant)

sig_taxa<-subset(TB_estimates_df, Significant == T & Level !="Unexposed")
sig_taxa<-unique(sig_taxa$Taxa)

TB_estimates_df$Significant<- TB_estimates_df$Taxa %in%  sig_taxa


TB_estimates_df
## edit confidence intervals

TB_estimates_df$Level<-factor(TB_estimates_df$Level, levels = c("Diseased", "Exposed", "Unexposed"))

TB_estimates_df$lower<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$lower-TB_estimates_df$Estimate, TB_estimates_df$lower)

TB_estimates_df$upper<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$upper-TB_estimates_df$Estimate, TB_estimates_df$upper)

# order taxa

TB_estimates_df$Taxa<-factor(TB_estimates_df$Taxa, levels = taxa_order)

#############
#############
#############

plot_tb_2<-ggplot(subset(TB_estimates_df, Taxa!= "uncultured"), aes( x = Estimate2, y = Taxa, group = Level))+
  theme_bw(base_size = 14)+
  geom_vline(xintercept = 0, linetype = "dashed")+
  geom_errorbarh(aes(col = Level, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, position=position_dodge(width=0.5))+
  
  geom_errorbarh(aes(alpha = Significant, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, col = "grey", position=position_dodge(width=0.5))+
  
  scale_alpha_discrete(range = c(1,0), guide = "none")+
  
  geom_point(aes(col = Level), size = 2, position=position_dodge(width=0.5))+
  
  scale_color_manual(values = c( "brown2", "skyblue","forestgreen"),  guide = guide_legend(reverse = TRUE))+
  xlim(c(-0.9,NA))+
  
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
  xlab("Effect size")+
  labs(col = "TB stage")+
  theme(plot.margin=unit(c(1,0.2,0.2,0.2),"cm"))+
  # legend
  theme( legend.background = element_blank(),
         legend.box.background = element_rect(colour = "grey", fill = "white"))+ 
  theme(legend.position=c(0.2,0.95),  legend.direction="vertical", 
        legend.key.height = unit(0.05, 'cm'),
        legend.key.width = unit(0.02, 'cm'),
        legend.title=element_text(size=8),
        legend.text =element_text(size=8))+
  guides(colour=guide_legend(title.position="top",                           title.hjust =0.5,reverse = TRUE))+
  coord_cartesian(xlim = c(-0.7, 0.7))  +
  scale_x_continuous(breaks = c(-0.3, 0, 0.3))+
  theme(legend.margin=margin(t = c(0.1,0.1,0.1,0.1), unit='cm'))


plot_tb_2

This chunk took 0.05 minutes

10.0.4 Body condition estimates

names(Condition_estimates)<-taxanames

condition_estimates_df<-do.call(rbind, Condition_estimates)

condition_estimates_df$Significant <-condition_estimates_df$Pr...t.. < 0.05


condition_estimates_df$Taxa<-factor(condition_estimates_df$Taxa, levels = taxa_order)


plot_condition<-
  ggplot(subset(condition_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
  theme_bw(base_size = 14)+
  geom_point( aes(col = Significant), size = 3)+
  geom_errorbarh(aes(col = Significant, xmin = ((Estimate- Std..Error)-0.0005), xmax = ((Estimate+ Std..Error)+0.0005) ), height = 0, size = 1)+
  geom_vline(xintercept = 0, linetype = "dashed")+
  scale_color_manual(values = c("grey", "blue"))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
  xlab("Effect size")+
  xlim(c(-0.004,NA))+
  #legend
  theme( legend.background = element_blank(),
         legend.box.background = element_rect(colour = "grey", fill = "white"))+
  theme(legend.position=c(0.8,0.95), legend.direction="vertical", 
        
        legend.key.height = unit(0.02, 'cm'),
        legend.key.width = unit(0.02, 'cm'),
        legend.title=element_text(size=8),
        legend.text =element_text(size=6))+
  guides(colour=guide_legend(title.position="top", reverse = T,
                             title.hjust =0.5))+
  coord_cartesian(xlim = c(-0.01, 0.01))+
  scale_x_continuous(breaks = c(-0.005, 0, 0.005))

plot_condition

This chunk took 0.02 minutes

10.0.5 Maximum temp estimates

names(Temp_estimates)<-taxanames

temp_estimates_df<-do.call(rbind, Temp_estimates)

temp_estimates_df$Significant <-temp_estimates_df$Pr...t.. < 0.05


temp_estimates_df$Taxa<-factor(temp_estimates_df$Taxa, levels = taxa_order)



plot_temp<-
  ggplot(subset(temp_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
  theme_bw(base_size = 14)+
  geom_point( aes(col = Significant), size = 3)+
  geom_errorbarh(aes(col = Significant, xmin = Estimate- Std..Error, xmax = Estimate+ Std..Error), height = 0, size = 1)+
  geom_vline(xintercept = 0, linetype = "dashed")+
  scale_color_manual(values = c("grey", "blue"))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
  xlab("Effect size")+
  # xlim(c(-0.004,NA))+
  #legend
  theme( legend.background = element_blank(),
         legend.box.background = element_rect(colour = "grey", fill = "white"))+
  theme(legend.position=c(0.8,0.95), legend.direction="vertical", 
        legend.key.height = unit(0.02, 'cm'),
        legend.key.width = unit(0.02, 'cm'),
        legend.title=element_text(size=8),
        legend.text =element_text(size=6))+
  guides(colour=guide_legend(title.position="top", reverse = T,
                             title.hjust =0.5))+
  coord_cartesian(xlim = c(-0.1, 0.1))+
  scale_x_continuous(breaks = c(-0.05, 0, 0.05))

plot_temp

This chunk took 0.02 minutes

10.0.6 Rainfall estimates

names(Rainfall_estimates)<-taxanames

rainfall_estimates_df<-do.call(rbind, Rainfall_estimates)

rainfall_estimates_df$Significant <-rainfall_estimates_df$Pr...t.. < 0.05

rainfall_estimates_df$Taxa<-factor(rainfall_estimates_df$Taxa, levels = taxa_order)


plot_rainfall<-
  ggplot(subset(rainfall_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
  theme_bw(base_size = 14)+
  geom_point( aes(col = Significant), size = 3)+
  geom_errorbarh(aes(col = Significant, xmin = Estimate- Std..Error, xmax = Estimate+ Std..Error), height = 0, size = 1)+
  geom_vline(xintercept = 0, linetype = "dashed")+
  scale_color_manual(values = c("grey", "blue"))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
  xlab("Effect size")+
  # xlim(c(-0.004,NA))+
  #legend
  theme( legend.background = element_blank(),
         legend.box.background = element_rect(colour = "grey", fill = "white"))+
  theme(legend.position=c(0.8,0.95), legend.direction="vertical", 
        legend.key.height = unit(0.02, 'cm'),
        legend.key.width = unit(0.02, 'cm'),
        legend.title=element_text(size=8),
        legend.text =element_text(size=6))+
  guides(colour=guide_legend(title.position="top", reverse = T,
                             title.hjust =0.5))+
  coord_cartesian(xlim = c(-0.5, 0.5))+
  scale_x_continuous(breaks = c(-0.3, 0, 0.3))

plot_rainfall

This chunk took 0.01 minutes

10.0.7 Fig. 4: Plot estimates together

# Combining plots
rm_legend <- function(p){p + theme(legend.position = "none")}

ggarrange(rm_legend(plot_tb_1), plot_tb_2, plot_rainfall, plot_temp, plot_condition, nrow = 1, align = "h", widths = c(2.2,1,1,1, 1), labels = c("a) Year", "b) TB", "c) Rainfall","d) Max. temp.", "e) Body condition"), label.x = c(0.45,-0.05,-0.15,-0.25, -0.32))

#ggsave("Figures/Fig4.pdf")

This chunk took 0.21 minutes

11 Network analysis

*Need to do CLR analysis from here as this accounts for bacterial load

#### extract site x species array

meerkat_filtered_genus<-tax_glom(meerkat_filtered, taxrank = "Genus")
genus_core<-prune_taxa(top_genera, meerkat_filtered_genus)
taxtable<-data.frame(tax_table(genus_core))
taxa_names(genus_core)<-taxtable$Genus

phylo<-genus_core

taxtable<-data.frame(tax_table(phylo))

taxa_names(phylo)<-taxtable$Genus

phylo_array<-data.frame(t(phylo@otu_table@.Data))


#phylo_array[1:5,1:5]
names(phylo_array)<-taxa_names(phylo)

dim(phylo_array)
## [1] 1141   30
########################## 

#### genreate association matrix from the 'real' data
# this uses the package NetCoMi


net_real <- netConstruct(phylo_array, verbose = 0, 
                         measure = "spearman",
                         normMethod = "clr",
                         zeroMethod = "none",
                         sparsMethod = "threshold",
                         thresh = 0.3)


props_single <- netAnalyze(net_real, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, 
                           normDeg = FALSE)



plot(props_single, 
     nodeColor = "cluster", 
     layout = "spring",
     repulsion = 1,
     # nodeSize = "eigenvector",
     title1 = F, 
     #showTitle = TRUE,
     cexTitle = 2.3,
     shortenLabels = "simple",
     labelScale = F,
     nodeSize = "fix",
     colorVec = c("orchid", "skyblue", "orange", "lightgreen", "pink"),
     nodeTransp = 0,
     borderCol = "white",
     borderWidth = 3,
     highlightHubs = F ,
     edgeTranspLow = 0)

legend(0.7, 1.1, cex = 1, title = "Direction", 
       legend = c("+","-"), lty = 1, lwd = 1, col = c("#009900","red"), 
       bty = "n", horiz = F)

cluster_memb<-data.frame(props_single$clustering$clust1)

head(cluster_memb, 20)
names(cluster_memb)<-"Cluster"

# make singletons their own cluster (not zero)

cluster_memb$Cluster<-ifelse(row.names(cluster_memb)=="Escherichia-Shigella", 6,cluster_memb$Cluster )
cluster_memb$Cluster<-ifelse(row.names(cluster_memb)=="Peptoclostridium", 7,cluster_memb$Cluster )
cluster_memb$Cluster<-ifelse(row.names(cluster_memb)=="Ruminococcaceae UCG-014", 8,cluster_memb$Cluster )


cluster_memb$Cluster2<-case_when(cluster_memb == 1 ~ "Lactococcus cluster",
                                 cluster_memb == 2 ~ "Bacteroides cluster",
                                 cluster_memb == 3 ~ "Blautia cluster",
                                 cluster_memb == 4 ~ "Clostridium cluster",
                                 cluster_memb == 5 ~ "Bacillus cluster",
                                 cluster_memb == 6 ~ "Escherichia-Shigella",
                                 cluster_memb == 7 ~ "Peptoclostridium",
                                 cluster_memb == 8 ~ "Ruminococcus UCG-014" )

This chunk took 0.25 minutes

12 Survival analysis

meerkat_filtered_genus<-tax_glom(meerkat_filtered, taxrank = "Genus")
genus_core<-prune_taxa(top_genera, meerkat_filtered_genus)
taxtable<-data.frame(tax_table(genus_core))
taxa_names(genus_core)<-taxtable$Genus


### 1) Genus level
### 1) Genus level
### 1) Genus level
### 1) Genus level

survival_df<-data.frame(sample_data(genus_core))
head(survival_df)
# calculate if ID is alive 6 months later, TRUE = dead

survival_df$Time_180<-survival_df$SampleDate+180
survival_df$Survival_180<- ifelse(survival_df$Time_180 < survival_df$DeathDate, FALSE,TRUE)


table(survival_df$Survival_180)
## 
## FALSE  TRUE 
##   811   330
survival_df$FollowUp_180<-180


## add on genus level abundance 
genus_clr<-microbiome::transform(genus_core, transform = "clr")

otutable<-data.frame(t(otu_table(genus_clr)))
otutable[1:5, 1:5]
names(otutable)<-taxa_names(genus_clr)
names(otutable)<-make.names(names(otutable),unique = TRUE)
otutable<-data.frame(scale(otutable))
#hist(otutable$Helicobacter)

survival_df_genus<-merge(survival_df, otutable, by = 0)
head(survival_df_genus)
# model 

names(survival_df_genus)
##  [1] "Row.names"                     "feature.id"                   
##  [3] "IndividID"                     "SampleDate"                   
##  [5] "SampleTime"                    "Storage"                      
##  [7] "Seq_run"                       "Imtechella"                   
##  [9] "Allobacillus"                  "Seq_depth"                    
## [11] "scaling_factor"                "Seq_depth_nospikein"          
## [13] "BirthDate"                     "DeathDate"                    
## [15] "AgeAtSampling"                 "GroupAtSampling"              
## [17] "Condition_weight"              "SocialStatus"                 
## [19] "Temp_max"                      "sum_rainfall_month"           
## [21] "HoursAfterSunrise"             "TimeCat"                      
## [23] "Year"                          "month"                        
## [25] "Season"                        "TB_status"                    
## [27] "TB_exposure"                   "TB_resistance"                
## [29] "TB_survival"                   "TB_symptom_date"              
## [31] "TB_exposure_date"              "TB_stage"                     
## [33] "TotalAbundance"                "Time_180"                     
## [35] "Survival_180"                  "FollowUp_180"                 
## [37] "Helicobacter"                  "Geodermatophilus"             
## [39] "Fusobacterium"                 "Tyzzerella.4"                 
## [41] "X.Ruminococcus..torques.group" "Blautia"                      
## [43] "Marvinbryantia"                "Lachnoclostridium"            
## [45] "Lachnospiraceae.NK4A136.group" "Rikenellaceae.RC9.gut.group"  
## [47] "Bacteroides"                   "Alloprevotella"               
## [49] "Parabacteroides"               "Paeniclostridium"             
## [51] "Romboutsia"                    "Peptoclostridium"             
## [53] "Collinsella"                   "Enterococcus"                 
## [55] "Pediococcus"                   "Weissella"                    
## [57] "Bacillus"                      "Turicibacter"                 
## [59] "Catenisphaera"                 "Lactococcus"                  
## [61] "Clostridium.sensu.stricto.1"   "Peptococcus"                  
## [63] "Phascolarctobacterium"         "Ruminococcaceae.UCG.014"      
## [65] "Slackia"                       "Escherichia.Shigella"
#survival_df_genus<-survival_df_genus[,c(2, 30, 31,8, 13,20,27,19,18,32:60)]
names(survival_df_genus)
##  [1] "Row.names"                     "feature.id"                   
##  [3] "IndividID"                     "SampleDate"                   
##  [5] "SampleTime"                    "Storage"                      
##  [7] "Seq_run"                       "Imtechella"                   
##  [9] "Allobacillus"                  "Seq_depth"                    
## [11] "scaling_factor"                "Seq_depth_nospikein"          
## [13] "BirthDate"                     "DeathDate"                    
## [15] "AgeAtSampling"                 "GroupAtSampling"              
## [17] "Condition_weight"              "SocialStatus"                 
## [19] "Temp_max"                      "sum_rainfall_month"           
## [21] "HoursAfterSunrise"             "TimeCat"                      
## [23] "Year"                          "month"                        
## [25] "Season"                        "TB_status"                    
## [27] "TB_exposure"                   "TB_resistance"                
## [29] "TB_survival"                   "TB_symptom_date"              
## [31] "TB_exposure_date"              "TB_stage"                     
## [33] "TotalAbundance"                "Time_180"                     
## [35] "Survival_180"                  "FollowUp_180"                 
## [37] "Helicobacter"                  "Geodermatophilus"             
## [39] "Fusobacterium"                 "Tyzzerella.4"                 
## [41] "X.Ruminococcus..torques.group" "Blautia"                      
## [43] "Marvinbryantia"                "Lachnoclostridium"            
## [45] "Lachnospiraceae.NK4A136.group" "Rikenellaceae.RC9.gut.group"  
## [47] "Bacteroides"                   "Alloprevotella"               
## [49] "Parabacteroides"               "Paeniclostridium"             
## [51] "Romboutsia"                    "Peptoclostridium"             
## [53] "Collinsella"                   "Enterococcus"                 
## [55] "Pediococcus"                   "Weissella"                    
## [57] "Bacillus"                      "Turicibacter"                 
## [59] "Catenisphaera"                 "Lactococcus"                  
## [61] "Clostridium.sensu.stricto.1"   "Peptococcus"                  
## [63] "Phascolarctobacterium"         "Ruminococcaceae.UCG.014"      
## [65] "Slackia"                       "Escherichia.Shigella"
survival_df_genus$AgeAtSampling<-scale(log10(survival_df_genus$AgeAtSampling))
survival_df_genus$Condition_weight<-scale(survival_df_genus$Condition_weight)
survival_df_genus$IndividID<-as.character(survival_df_genus$IndividID)
survival_df_genus$Season <-case_when(survival_df_genus$Season == "WET" ~ "Wet",
                                     survival_df_genus$Season == "DRY" ~ "Dry")
names(survival_df_genus)[15]<-"Age"

survival_df_genus<-subset(survival_df_genus, !is.na(Survival_180))
dim(survival_df_genus)
## [1] 1141   66
#survival_df_genus$Survival<-Surv(survival_df_genus$FollowUp_180, survival_df_genus$Survival_180)


frmla <- as.formula(paste("Surv(FollowUp_180, Survival_180)", 
                          paste(names(survival_df_genus)[5:38],  sep = "", 
                                collapse = " + "), sep = " ~ "))

frmla <-update.formula(frmla, ~ . + (1|IndividID))



fit_genus <- coxme::coxme(Surv(FollowUp_180, Survival_180) ~ 
                            Age + 
                            Season + 
                            TB_stage + 
                            SocialStatus +
                            Condition_weight + 
                            Fusobacterium+
                            Bacteroides+
                            Parabacteroides+
                            Phascolarctobacterium+
                            Helicobacter + 
                            Alloprevotella+
                            Enterococcus + 
                            Pediococcus + 
                            Lactococcus+
                            Peptoclostridium+
                            Collinsella+
                            Slackia+
                            
                            (1 | IndividID),
                          data = survival_df_genus)


fit_genus
## Cox mixed-effects model fit by maximum likelihood
##   Data: survival_df_genus
##   events, n = 330, 1141
##   Iterations= 24 150 
##                     NULL Integrated    Fitted
## Log-likelihood -2270.128  -2065.913 -1911.819
## 
##                    Chisq     df p    AIC    BIC
## Integrated loglik 408.43  19.00 0 370.43 298.25
##  Penalized loglik 716.62 133.39 0 449.84 -56.92
## 
## Model:  Surv(FollowUp_180, Survival_180) ~ Age + Season + TB_stage +      SocialStatus + Condition_weight + Fusobacterium + Bacteroides +      Parabacteroides + Phascolarctobacterium + Helicobacter +      Alloprevotella + Enterococcus + Pediococcus + Lactococcus +      Peptoclostridium + Collinsella + Slackia + (1 | IndividID) 
## Fixed coefficients
##                                 coef exp(coef)   se(coef)     z       p
## Age                      1.578270076 4.8465644 0.12349691 12.78 0.0e+00
## SeasonWet               -0.345947098 0.7075499 0.13207267 -2.62 8.8e-03
## TB_stageExposed          1.468404417 4.3423011 0.21576219  6.81 1.0e-11
## TB_stageDiseased         2.253788964 9.5237527 0.26572017  8.48 0.0e+00
## SocialStatusSubordinate  0.716211386 2.0466645 0.21278473  3.37 7.6e-04
## Condition_weight        -0.280096365 0.7557109 0.06493959 -4.31 1.6e-05
## Fusobacterium            0.079388592 1.0826249 0.08837575  0.90 3.7e-01
## Bacteroides             -0.089877390 0.9140432 0.10342463 -0.87 3.8e-01
## Parabacteroides          0.019271270 1.0194582 0.09393135  0.21 8.4e-01
## Phascolarctobacterium   -0.128020564 0.8798353 0.09877150 -1.30 1.9e-01
## Helicobacter            -0.052896360 0.9484783 0.07958094 -0.66 5.1e-01
## Alloprevotella           0.082769014 1.0862909 0.07785428  1.06 2.9e-01
## Enterococcus            -0.090187680 0.9137597 0.07411484 -1.22 2.2e-01
## Pediococcus             -0.102416622 0.9026534 0.07462879 -1.37 1.7e-01
## Lactococcus             -0.092793469 0.9113817 0.07505914 -1.24 2.2e-01
## Peptoclostridium        -0.007256337 0.9927699 0.07105360 -0.10 9.2e-01
## Collinsella              0.112057996 1.1185777 0.08414084  1.33 1.8e-01
## Slackia                 -0.092482014 0.9116656 0.08569759 -1.08 2.8e-01
## 
## Random effects
##  Group     Variable  Std Dev  Variance
##  IndividID Intercept 1.045863 1.093830
sjPlot::plot_model(fit_genus, axis.lim = c(0.8,20), vline.color = "darkgrey", sort.est = T)+  theme_bw(base_size = 16)+ggtitle("")+ylab("Hazard ratio")

#options(na.action = "na.fail") 

#dredge_genus_df<- MuMIn::dredge(fit_genus, fixed = c("Age", "Season", "TB_stage",
# "SocialStatus", "Condition_weight"))

#write.csv(dredge_genus_df, "dredge_results_genus.csv")


######### 2) Network approach ############
######### 2) Network approach ############
######### 2) Network approach ############
######### 2) Network approach ############


pseq_network<-genus_core
head(cluster_memb)
tail(cluster_memb)
pseq_network@tax_table@.Data[,1]<-cluster_memb$Cluster2

pseq_network@tax_table@.Data
##                               Kingdom                Phylum              
## Helicobacter                  "Bacteroides cluster"  "Epsilonbacteraeota"
## Geodermatophilus              "Bacillus cluster"     "Actinobacteria"    
## Fusobacterium                 "Bacteroides cluster"  "Fusobacteria"      
## Tyzzerella 4                  "Bacteroides cluster"  "Firmicutes"        
## [Ruminococcus] torques group  "Blautia cluster"      "Firmicutes"        
## Blautia                       "Blautia cluster"      "Firmicutes"        
## Marvinbryantia                "Blautia cluster"      "Firmicutes"        
## Lachnoclostridium             "Blautia cluster"      "Firmicutes"        
## Lachnospiraceae NK4A136 group "Bacteroides cluster"  "Firmicutes"        
## Rikenellaceae RC9 gut group   "Bacteroides cluster"  "Bacteroidetes"     
## Bacteroides                   "Bacteroides cluster"  "Bacteroidetes"     
## Alloprevotella                "Bacteroides cluster"  "Bacteroidetes"     
## Parabacteroides               "Bacteroides cluster"  "Bacteroidetes"     
## Paeniclostridium              "Clostridium cluster"  "Firmicutes"        
## Romboutsia                    "Clostridium cluster"  "Firmicutes"        
## Peptoclostridium              "Peptoclostridium"     "Firmicutes"        
## Collinsella                   "Blautia cluster"      "Actinobacteria"    
## Enterococcus                  "Lactococcus cluster"  "Firmicutes"        
## Pediococcus                   "Lactococcus cluster"  "Firmicutes"        
## Weissella                     "Lactococcus cluster"  "Firmicutes"        
## Bacillus                      "Bacillus cluster"     "Firmicutes"        
## Turicibacter                  "Bacillus cluster"     "Firmicutes"        
## Catenisphaera                 "Blautia cluster"      "Firmicutes"        
## Lactococcus                   "Lactococcus cluster"  "Firmicutes"        
## Clostridium sensu stricto 1   "Clostridium cluster"  "Firmicutes"        
## Peptococcus                   "Blautia cluster"      "Firmicutes"        
## Phascolarctobacterium         "Bacteroides cluster"  "Firmicutes"        
## Ruminococcaceae UCG-014       "Ruminococcus UCG-014" "Firmicutes"        
## Slackia                       "Blautia cluster"      "Actinobacteria"    
## Escherichia-Shigella          "Escherichia-Shigella" "Proteobacteria"    
##                               Class                 Order               
## Helicobacter                  "Campylobacteria"     "Campylobacterales" 
## Geodermatophilus              "Actinobacteria"      "Frankiales"        
## Fusobacterium                 "Fusobacteriia"       "Fusobacteriales"   
## Tyzzerella 4                  "Clostridia"          "Clostridiales"     
## [Ruminococcus] torques group  "Clostridia"          "Clostridiales"     
## Blautia                       "Clostridia"          "Clostridiales"     
## Marvinbryantia                "Clostridia"          "Clostridiales"     
## Lachnoclostridium             "Clostridia"          "Clostridiales"     
## Lachnospiraceae NK4A136 group "Clostridia"          "Clostridiales"     
## Rikenellaceae RC9 gut group   "Bacteroidia"         "Bacteroidales"     
## Bacteroides                   "Bacteroidia"         "Bacteroidales"     
## Alloprevotella                "Bacteroidia"         "Bacteroidales"     
## Parabacteroides               "Bacteroidia"         "Bacteroidales"     
## Paeniclostridium              "Clostridia"          "Clostridiales"     
## Romboutsia                    "Clostridia"          "Clostridiales"     
## Peptoclostridium              "Clostridia"          "Clostridiales"     
## Collinsella                   "Coriobacteriia"      "Coriobacteriales"  
## Enterococcus                  "Bacilli"             "Lactobacillales"   
## Pediococcus                   "Bacilli"             "Lactobacillales"   
## Weissella                     "Bacilli"             "Lactobacillales"   
## Bacillus                      "Bacilli"             "Bacillales"        
## Turicibacter                  "Erysipelotrichia"    "Erysipelotrichales"
## Catenisphaera                 "Erysipelotrichia"    "Erysipelotrichales"
## Lactococcus                   "Bacilli"             "Lactobacillales"   
## Clostridium sensu stricto 1   "Clostridia"          "Clostridiales"     
## Peptococcus                   "Clostridia"          "Clostridiales"     
## Phascolarctobacterium         "Negativicutes"       "Selenomonadales"   
## Ruminococcaceae UCG-014       "Clostridia"          "Clostridiales"     
## Slackia                       "Coriobacteriia"      "Coriobacteriales"  
## Escherichia-Shigella          "Gammaproteobacteria" "Enterobacteriales" 
##                               Family                 
## Helicobacter                  "Helicobacteraceae"    
## Geodermatophilus              "Geodermatophilaceae"  
## Fusobacterium                 "Fusobacteriaceae"     
## Tyzzerella 4                  "Lachnospiraceae"      
## [Ruminococcus] torques group  "Lachnospiraceae"      
## Blautia                       "Lachnospiraceae"      
## Marvinbryantia                "Lachnospiraceae"      
## Lachnoclostridium             "Lachnospiraceae"      
## Lachnospiraceae NK4A136 group "Lachnospiraceae"      
## Rikenellaceae RC9 gut group   "Rikenellaceae"        
## Bacteroides                   "Bacteroidaceae"       
## Alloprevotella                "Prevotellaceae"       
## Parabacteroides               "Tannerellaceae"       
## Paeniclostridium              "Peptostreptococcaceae"
## Romboutsia                    "Peptostreptococcaceae"
## Peptoclostridium              "Peptostreptococcaceae"
## Collinsella                   "Coriobacteriaceae"    
## Enterococcus                  "Enterococcaceae"      
## Pediococcus                   "Lactobacillaceae"     
## Weissella                     "Leuconostocaceae"     
## Bacillus                      "Bacillaceae"          
## Turicibacter                  "Erysipelotrichaceae"  
## Catenisphaera                 "Erysipelotrichaceae"  
## Lactococcus                   "Streptococcaceae"     
## Clostridium sensu stricto 1   "Clostridiaceae 1"     
## Peptococcus                   "Peptococcaceae"       
## Phascolarctobacterium         "Acidaminococcaceae"   
## Ruminococcaceae UCG-014       "Ruminococcaceae"      
## Slackia                       "Eggerthellaceae"      
## Escherichia-Shigella          "Enterobacteriaceae"   
##                               Genus                           Species
## Helicobacter                  "Helicobacter"                  NA     
## Geodermatophilus              "Geodermatophilus"              NA     
## Fusobacterium                 "Fusobacterium"                 NA     
## Tyzzerella 4                  "Tyzzerella 4"                  NA     
## [Ruminococcus] torques group  "[Ruminococcus] torques group"  NA     
## Blautia                       "Blautia"                       NA     
## Marvinbryantia                "Marvinbryantia"                NA     
## Lachnoclostridium             "Lachnoclostridium"             NA     
## Lachnospiraceae NK4A136 group "Lachnospiraceae NK4A136 group" NA     
## Rikenellaceae RC9 gut group   "Rikenellaceae RC9 gut group"   NA     
## Bacteroides                   "Bacteroides"                   NA     
## Alloprevotella                "Alloprevotella"                NA     
## Parabacteroides               "Parabacteroides"               NA     
## Paeniclostridium              "Paeniclostridium"              NA     
## Romboutsia                    "Romboutsia"                    NA     
## Peptoclostridium              "Peptoclostridium"              NA     
## Collinsella                   "Collinsella"                   NA     
## Enterococcus                  "Enterococcus"                  NA     
## Pediococcus                   "Pediococcus"                   NA     
## Weissella                     "Weissella"                     NA     
## Bacillus                      "Bacillus"                      NA     
## Turicibacter                  "Turicibacter"                  NA     
## Catenisphaera                 "Catenisphaera"                 NA     
## Lactococcus                   "Lactococcus"                   NA     
## Clostridium sensu stricto 1   "Clostridium sensu stricto 1"   NA     
## Peptococcus                   "Peptococcus"                   NA     
## Phascolarctobacterium         "Phascolarctobacterium"         NA     
## Ruminococcaceae UCG-014       "Ruminococcaceae UCG-014"       NA     
## Slackia                       "Slackia"                       NA     
## Escherichia-Shigella          "Escherichia-Shigella"          NA
pseq_network2<-tax_glom(pseq_network, taxrank = "Kingdom")
pseq_network2<-microbiome::transform(pseq_network2, "clr")
taxa_names(pseq_network2)
## [1] "Blautia"                     "Bacteroides"                
## [3] "Peptoclostridium"            "Enterococcus"               
## [5] "Bacillus"                    "Clostridium sensu stricto 1"
## [7] "Ruminococcaceae UCG-014"     "Escherichia-Shigella"
taxa_names(pseq_network2)<-c("Blautia_cluster", "Bacteroides_cluster", "Peptoclostridium", "Lactococcus_cluster", "Bacillus_cluster", "Clostridium_cluster", "Ruminococcus_UCG-014", "Escherichia-Shigella")


otutable<-data.frame(t(otu_table(pseq_network2)))
otutable[1:5, 1:5]
otutable<-data.frame(scale(otutable))
#hist(otutable$Helicobacter)

survival_df_network<-merge(survival_df, otutable, by = 0)



#############
#############
#############
#############

names(survival_df_network)
##  [1] "Row.names"            "feature.id"           "IndividID"           
##  [4] "SampleDate"           "SampleTime"           "Storage"             
##  [7] "Seq_run"              "Imtechella"           "Allobacillus"        
## [10] "Seq_depth"            "scaling_factor"       "Seq_depth_nospikein" 
## [13] "BirthDate"            "DeathDate"            "AgeAtSampling"       
## [16] "GroupAtSampling"      "Condition_weight"     "SocialStatus"        
## [19] "Temp_max"             "sum_rainfall_month"   "HoursAfterSunrise"   
## [22] "TimeCat"              "Year"                 "month"               
## [25] "Season"               "TB_status"            "TB_exposure"         
## [28] "TB_resistance"        "TB_survival"          "TB_symptom_date"     
## [31] "TB_exposure_date"     "TB_stage"             "TotalAbundance"      
## [34] "Time_180"             "Survival_180"         "FollowUp_180"        
## [37] "Blautia_cluster"      "Bacteroides_cluster"  "Peptoclostridium"    
## [40] "Lactococcus_cluster"  "Bacillus_cluster"     "Clostridium_cluster" 
## [43] "Ruminococcus_UCG.014" "Escherichia.Shigella"
#survival_df_network<-survival_df_network[,c(2, 8,30, 31, 13,20,27,19,18,32:38)]
#names(survival_df_network)
survival_df_network$AgeAtSampling<-scale(log10(survival_df_network$AgeAtSampling))
survival_df_network$Condition_weight<-scale(survival_df_network$Condition_weight)
survival_df_network$IndividID<-as.character(survival_df_network$IndividID)

survival_df_network$Season <-case_when(survival_df_network$Season == "WET" ~ "Wet",
                                       survival_df_network$Season == "DRY" ~ "Dry")
names(survival_df_network)[15]<-"Age"

survival_df_network<-subset(survival_df_network, !is.na(Survival_180))
dim(survival_df_network)
## [1] 1141   44
#survival_df_network$Survival<-Surv(survival_df_network$FollowUp_180, survival_df_network$Survival_180)


frmla <- as.formula(paste("Surv(FollowUp_180, Survival_180)", 
                          paste(names(survival_df_network)[5:16],  sep = "", 
                                collapse = " + "), sep = " ~ "))

frmla <-update.formula(frmla, ~ . + (1|IndividID))


fit_network <- coxme::coxme(Surv(FollowUp_180, Survival_180) ~ Age + 
                              Season + 
                              TB_stage + 
                              SocialStatus + 
                              Condition_weight + 
                              Blautia_cluster + 
                              Bacteroides_cluster + 
                              Peptoclostridium + 
                              Lactococcus_cluster + 
                              Bacillus_cluster + 
                              Clostridium_cluster + 
                              Ruminococcus_UCG.014  + 
                              (1 | IndividID),
                            data = survival_df_network)

summary(fit_network)
## Cox mixed-effects model fit by maximum likelihood
##   Data: survival_df_network
##   events, n = 330, 1141
##   Iterations= 23 144 
##                     NULL Integrated    Fitted
## Log-likelihood -2270.128  -2068.655 -1911.604
## 
##                    Chisq    df p    AIC    BIC
## Integrated loglik 402.95  14.0 0 374.95 321.76
##  Penalized loglik 717.05 130.5 0 456.04 -39.75
## 
## Model:  Surv(FollowUp_180, Survival_180) ~ Age + Season + TB_stage +      SocialStatus + Condition_weight + Blautia_cluster + Bacteroides_cluster +      Peptoclostridium + Lactococcus_cluster + Bacillus_cluster +      Clostridium_cluster + Ruminococcus_UCG.014 + (1 | IndividID) 
## Fixed coefficients
##                                coef exp(coef)   se(coef)     z       p
## Age                      1.58457919 4.8772386 0.12297031 12.89 0.0e+00
## SeasonWet               -0.37548490 0.6869561 0.13091206 -2.87 4.1e-03
## TB_stageExposed          1.48157845 4.3998852 0.21751584  6.81 9.7e-12
## TB_stageDiseased         2.30012696 9.9754488 0.26435469  8.70 0.0e+00
## SocialStatusSubordinate  0.73367572 2.0827221 0.21348281  3.44 5.9e-04
## Condition_weight        -0.27935861 0.7562687 0.06611843 -4.23 2.4e-05
## Blautia_cluster          0.00471949 1.0047306 0.06993167  0.07 9.5e-01
## Bacteroides_cluster     -0.02101547 0.9792038 0.09795373 -0.21 8.3e-01
## Peptoclostridium        -0.04365207 0.9572870 0.08516750 -0.51 6.1e-01
## Lactococcus_cluster     -0.16382231 0.8488928 0.08240857 -1.99 4.7e-02
## Bacillus_cluster         0.03871740 1.0394767 0.08558698  0.45 6.5e-01
## Clostridium_cluster     -0.09186031 0.9122326 0.08657432 -1.06 2.9e-01
## Ruminococcus_UCG.014    -0.05779993 0.9438388 0.07910965 -0.73 4.7e-01
## 
## Random effects
##  Group     Variable  Std Dev  Variance
##  IndividID Intercept 1.064405 1.132957
sjPlot::plot_model(fit_network, axis.lim = c(0.8,11), 
                   vline.color = "darkgrey", sort.est = T, 
                   dot.size= 4, line.size = 1.5, show.p = T)+  
  theme_bw(base_size = 20)+ggtitle("")+ylab("Hazard ratio")

This chunk took 0.34 minutes

13 Longitudinal analysis

  • Keep only individuals that were sampled across all TB infection stages
table(sample_data(meerkat_filtered)$TB_status)
## 
## Unexposed   Exposed  Diseased 
##       140       524       477
table(sample_data(meerkat_filtered)$TB_stage)
## 
## Unexposed   Exposed  Diseased 
##       421       630        90
ps_infected<-subset_samples(meerkat_filtered, TB_status !="Unexposed")

tb_df<-data.frame(table(sample_data(ps_infected)$IndividID, sample_data(ps_infected)$TB_stage))
tb_df2<-subset(tb_df, Var2 == "Diseased")
tb_df3<- subset(tb_df2, Freq>0)
id_keep<-as.character(tb_df3$Var1)

sample_data(ps_infected)$Keep<-sample_data(ps_infected)$IndividID %in% id_keep

ps_infected2<- subset_samples(ps_infected, Keep == T)

tb_df<-data.frame(table(sample_data(ps_infected)$IndividID, sample_data(ps_infected)$TB_stage))
tb_df2<-subset(tb_df, Var2 == "Unexposed")
tb_df3<- subset(tb_df2, Freq>0)
id_keep<-as.character(tb_df3$Var1)


sample_data(ps_infected2)$Keep<-sample_data(ps_infected2)$IndividID %in% id_keep

ps_infected3<- subset_samples(ps_infected2, Keep == T)

length(unique(sample_data(ps_infected3)$IndividID))
## [1] 11
table(sample_data(ps_infected3)$TB_stage)
## 
## Unexposed   Exposed  Diseased 
##        50        15        21
ggplot(sample_data(ps_infected3), aes(x = SampleDate, y = IndividID, group = IndividID))+geom_point(aes(col = TB_stage), size = 3)+geom_line()+ theme_bw()

This chunk took 0.1 minutes

  • Plot around exposure date
sample_data(ps_infected3)$Days_exposure<- sample_data(ps_infected3)$SampleDate - sample_data(ps_infected3)$TB_exposure_date
sample_data(ps_infected3)$BeforeExposure <- sample_data(ps_infected3)$SampleDate < sample_data(ps_infected3)$TB_exposure_date
table(sample_data(ps_infected3)$BeforeExposure)
## 
## FALSE  TRUE 
##    36    50
ggplot(sample_data(ps_infected3), aes(x = Days_exposure, y = IndividID, group = IndividID))+geom_point(aes(col = TB_stage), size = 3)+geom_line()+geom_vline(xintercept = 0, linetype = "dashed")

This chunk took 0.02 minutes

13.0.1 Repeat GAM models

genus_ps<-tax_glom(ps_infected3, taxrank = "Genus")
#genus_ps<-tax_glom(subset_samples(ps_infected3, Storage == "FREEZEDRIED"), taxrank = "Genus")

genus_abundances_df<-data.frame(taxa_sums(genus_ps))

genus_abundances_df$ASV<-row.names(genus_abundances_df)

taxonomy<-data.frame(tax_table(genus_ps))
taxonomy$ASV<-row.names(taxonomy)
sample_data(genus_ps)<-sample_data(genus_ps)[,c( "feature.id")]

top_genera_melt<-psmelt(genus_ps)


# make a category for rare genera

top_genera_melt$Genus2<-ifelse(top_genera_melt$OTU %in% top_genera, top_genera_melt$Genus, "Rare taxa")

head(top_genera_melt)
top_genera_melt2<-top_genera_melt %>% group_by(Sample, Genus2) %>% summarize(Abundance2 = sum(Abundance))


metadata<-data.frame(sample_data(ps_infected3))


top_genera_melt3<- merge(top_genera_melt2, metadata, by.x = "Sample", by.y = "feature.id")

This chunk took 0.05 minutes

TB_estimates<-list()
Rainfall_estimates<-list()

taxanames<-unique(top_genera_melt3$Genus2)

for (i in 1:length(taxanames)){
  tryCatch({ #catch errors
    print(i)
    
    taxa1<-subset(top_genera_melt3, Genus2 == taxanames[i])
    # taxa1<-subset(top_genera_melt3, Genus2 == "Bacteroides")
    
    metadata<-taxa1
    
    
    #scale variables
    metadata$TotalAbundance<-as.numeric(scale(log10(metadata$TotalAbundance)))
    metadata$Rainfall<-as.numeric(scale(log10(metadata$sum_rainfall_month+1)))
    metadata$TB_stage<-factor(metadata$TB_stage, levels = c("Unexposed", "Exposed","Diseased"))
    
    metadata$Year<-factor(metadata$Year)
    
    # fit gamm    
    m_taxa <- mgcv::bam(log10(Abundance2+1)~
                          s(HoursAfterSunrise, bs = "cr")+
                          s(TotalAbundance, bs = "cr")+
                          TB_stage+
                          Rainfall+
                          s(IndividID, bs = "re"),
                        #correlation = corARMA(form = ~ 1|Year, p = 3),
                        data=metadata, 
                        family = gaussian)
    
    
    summary<-summary(m_taxa)
    
    ## TB estimates 
    
    TB_estimates_df<-data.frame(summary$p.table)[1:3,]
    TB_estimates_df$Level<-c("Unexposed", "Exposed", "Diseased")
    names(TB_estimates_df)[4]<-"P_val"
    
    TB_estimates_df$Estimate2<-ifelse(TB_estimates_df$Level == "Unexposed", 0, TB_estimates_df$Estimate)
    
    
    TB_estimates_df$lower<-NA
    TB_estimates_df$upper<-NA
    
    # confidence intervals exposed
    beta <- coef(m_taxa)
    Vb <- vcov(m_taxa, unconditional = TRUE)
    se <- sqrt(diag(Vb))
    j <- which(names(beta) == "(Intercept)")
    cis<- beta[j] + (c(-1,1) * (2 * se[j]))
    
    TB_estimates_df[1,7]<-cis[1]
    TB_estimates_df[1,8]<-cis[2]
    
    # confidence intervals exposed
    j <- which(names(beta) == "TB_stageExposed")
    cis<- beta[j] + (c(-1,1) * (2 * se[j]))
    
    TB_estimates_df[2,7]<-cis[1]
    TB_estimates_df[2,8]<-cis[2]
    
    # confidence intervals symptomatic
    j <- which(names(beta) == "TB_stageDiseased")
    cis<- beta[j] + (c(-1,1) * (2 * se[j]))
    
    TB_estimates_df[3,7]<-cis[1]
    TB_estimates_df[3,8]<-cis[2]
    
    ## add coefficient
    
    TB_estimates_df$Coef <-as.numeric(coef(m_taxa)[1:3])
    TB_estimates_df$Taxa <-taxanames[i]
    TB_estimates[[i]]<-TB_estimates_df
    
    
    
    ## extract rainfall stats
    
    rainfall_estimates<- data.frame(summary$p.table)[4,]
    rainfall_estimates$Taxa<-taxanames[i]
    Rainfall_estimates[[i]]<-rainfall_estimates
    
    
  }, error=function(e){})
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27

This chunk took 0.03 minutes

names(TB_estimates)<-taxanames

TB_estimates_df<-do.call(rbind, TB_estimates)

TB_estimates_df$Significant <-TB_estimates_df$P_val < 0.05

# edit significance (intercept will always be significant)

sig_taxa<-subset(TB_estimates_df, Significant == T & Level !="Unexposed")
sig_taxa<-unique(sig_taxa$Taxa)

TB_estimates_df$Significant<- TB_estimates_df$Taxa %in%  sig_taxa


## edit confidence intervals

TB_estimates_df$Level<-factor(TB_estimates_df$Level, levels = c("Diseased", "Exposed", "Unexposed"))

TB_estimates_df$lower<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$lower-TB_estimates_df$Estimate, TB_estimates_df$lower)

TB_estimates_df$upper<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$upper-TB_estimates_df$Estimate, TB_estimates_df$upper)


# order taxa

TB_estimates_df$Taxa<-factor(TB_estimates_df$Taxa, levels = taxa_order)



######
#############
#############
#############

plot_tb_2<-ggplot(subset(TB_estimates_df, Taxa!= "uncultured"), aes( x = Estimate2, y = Taxa, group = Level))+
  theme_bw(base_size = 14)+
  geom_vline(xintercept = 0, linetype = "dashed")+
  geom_errorbarh(aes(col = Level, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, position=position_dodge(width=0.5))+
  
  geom_errorbarh(aes(alpha = Significant, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, col = "grey", position=position_dodge(width=0.5))+
  
  scale_alpha_discrete(range = c(1,0), guide = "none")+
  
  geom_point(aes(col = Level), size = 2, position=position_dodge(width=0.5))+
  
  scale_color_manual(values = c( "brown2", "skyblue","forestgreen"),  guide = guide_legend(reverse = TRUE))+
  xlim(c(-0.9,NA))+
  
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
  xlab("Effect size")+
  labs(col = "TB stage")+
  theme(plot.margin=unit(c(1,0.2,0.2,0.2),"cm"))+
  # legend
  theme( legend.background = element_blank(),
         legend.box.background = element_rect(colour = "grey", fill = "white"))+ 
  theme(legend.position=c(0.2,0.95),  legend.direction="vertical", 
        legend.key.height = unit(0.05, 'cm'),
        legend.key.width = unit(0.02, 'cm'),
        legend.title=element_text(size=8),
        legend.text =element_text(size=8))+
  guides(colour=guide_legend(title.position="top",                           title.hjust =0.5,reverse = TRUE))+
  coord_cartesian(xlim = c(-1, 1))+
  scale_x_continuous(breaks = c(-0.5, 0, 0.5))+
  theme(legend.margin=margin(t = c(0.1,0.1,0.1,0.1), unit='cm'))

This chunk took 0 minutes

names(Rainfall_estimates)<-taxanames

rainfall_estimates_df<-do.call(rbind, Rainfall_estimates)

rainfall_estimates_df$Significant <-rainfall_estimates_df$Pr...t.. < 0.05


rainfall_estimates_df$Taxa<-factor(rainfall_estimates_df$Taxa, levels = taxa_order)

plot_rainfall<-
  ggplot(subset(rainfall_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
  theme_bw(base_size = 14)+
  geom_point( aes(col = Significant), size = 3)+
  geom_errorbarh(aes(col = Significant, xmin = Estimate- Std..Error, xmax = Estimate+ Std..Error), height = 0, size = 1)+
  geom_vline(xintercept = 0, linetype = "dashed")+
  scale_color_manual(values = c("grey", "blue"))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
  xlab("Effect size")+
  # xlim(c(-0.004,NA))+
  #legend
  theme( legend.background = element_blank(),
         legend.box.background = element_rect(colour = "grey", fill = "white"))+
  theme(legend.position=c(0.8,0.95), legend.direction="vertical", 
        legend.key.height = unit(0.02, 'cm'),
        legend.key.width = unit(0.02, 'cm'),
        legend.title=element_text(size=8),
        legend.text =element_text(size=6))+
  guides(colour=guide_legend(title.position="top", reverse = T,
                             title.hjust =0.5))+
  coord_cartesian(xlim = c(-1, 1))+
  scale_x_continuous(breaks = c(-0.5, 0, 0.5))

This chunk took 0 minutes

# Combining plots
rm_legend <- function(p){p + theme(legend.position = "none")}

ggarrange(rm_legend(plot_tb_1), plot_tb_2, plot_rainfall, nrow = 1, align = "h", widths = c(2.2,1,1), labels = c("a) Year", "b) TB", "c) Rainfall"), label.x = c(0.35,-0.05,-0.15))

#

This chunk took 0.07 minutes